From 873d71d9265423a34291e7760a51ea7eccb518ee Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:10:14 -0500
Subject: [PATCH 01/33] Start adding preprocessors guards around openmp
 statements

---
 src/helpme.h       | 2 ++
 src/tensor_utils.h | 4 ++++
 2 files changed, 6 insertions(+)
diff --git a/src/helpme.h b/src/helpme.h
index a269c10..0de4c6d 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -817,7 +817,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy, Vxx, Vxy, Vyy, Vxz, Vyz, Vzz) num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;
diff --git a/src/tensor_utils.h b/src/tensor_utils.h
index 807367f..97d043a 100644
--- a/src/tensor_utils.h
+++ b/src/tensor_utils.h
@@ -32,7 +32,9 @@ namespace helpme {
 template <typename Real>
 void permuteABCtoCBA(Real const *__restrict__ abcPtr, int const aDimension, int const bDimension, int const cDimension,
                      Real *__restrict__ cbaPtr, size_t nThreads = 1) {
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
     for (int C = 0; C <= -1 + cDimension; ++C)
         for (int B = 0; B <= -1 + bDimension; ++B)
             for (int A = 0; A <= -1 + aDimension; ++A)
@@ -52,7 +54,9 @@ void permuteABCtoCBA(Real const *__restrict__ abcPtr, int const aDimension, int
 template <typename Real>
 void permuteABCtoACB(Real const *__restrict__ abcPtr, int const aDimension, int const bDimension, int const cDimension,
                      Real *__restrict__ acbPtr, size_t nThreads = 1) {
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
     for (int A = 0; A <= -1 + aDimension; ++A)
         for (int C = 0; C <= -1 + cDimension; ++C)
             for (int B = 0; B <= -1 + bDimension; ++B)

From 62098c12cbbe7ab671060f6e6e0d32399d4979dd Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:15:28 -0500
Subject: [PATCH 02/33] Add more guards

---
 src/helpme.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index 0de4c6d..f5017a9 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -926,7 +926,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy, Vxx, Vxy, Vyy, Vxz, Vyz, Vzz) num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;
@@ -1085,7 +1087,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;

From ac907bca37f7dd3c8db20232640d71939ff6f3dc Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:19:50 -0500
Subject: [PATCH 03/33] Add another guard

---
 src/helpme.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index f5017a9..35ce561 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1546,9 +1546,9 @@ class PMEInstance {
 
         int nComponents = nCartesian(parameterAngMom);
         size_t numBA = (size_t)myGridDimensionB_ * myGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
             int threadID = 0;
@@ -1565,7 +1565,9 @@ class PMEInstance {
                 spreadParametersImpl(atom, realGrid, nComponents, splineA, splineB, splineC, fractionalParameters,
                                      threadID);
             }
+#ifdef _OPENMP
         }
+#endif
         return realGrid;
     }
 

From 47226ff5a2ff9f3010b1ab35bd3583d35a1010fb Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:25:39 -0500
Subject: [PATCH 04/33] 3 more guards

---
 src/helpme.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/helpme.h b/src/helpme.h
index 35ce561..bcf4ee0 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1587,9 +1587,9 @@ class PMEInstance {
         gridAtomList_.resize(gridDimensionC_);
 
 // Classify atoms to their worker threads first, then construct splines for each thread
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
             int threadID = 0;
@@ -1630,8 +1630,9 @@ class PMEInstance {
                 }
             }
             numAtomsPerThread_[threadID] = myNumAtoms;
+#ifdef _OPENMP
         }
-
+#endif
         // We could intervene here and do some load balancing by inspecting the list.  Currently
         // the lazy approach of just assuming that the atoms are evenly distributed along c is used.
 
@@ -1650,9 +1651,9 @@ class PMEInstance {
             threadOffset[thread] = threadOffset[thread - 1] + numAtomsPerThread_[thread - 1];
         }
 
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
             int threadID = 0;
@@ -1685,12 +1686,13 @@ class PMEInstance {
                                                splineOrder_, splineDerivativeLevel);
                 }
             }
+#ifdef _OPENMP
         }
-
+#endif
 // Finally, find all of the splines that this thread will need to handle
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
             int threadID = 0;
@@ -1705,7 +1707,9 @@ class PMEInstance {
                 }
                 ++count;
             }
+#ifdef _OPENMP
         }
+#endif
     }
 
     /*!

From d7818e05636407367f1de5d1dea33a844e346e7f Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:36:39 -0500
Subject: [PATCH 05/33] Need to keep the code blocks to avoid redeclaring the
 threadID variable.

---
 src/helpme.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/helpme.h b/src/helpme.h
index bcf4ee0..3fa077b 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1592,6 +1592,7 @@ class PMEInstance {
         {
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             for (size_t row = threadID; row < gridDimensionC_; row += nThreads_) {
@@ -1630,9 +1631,7 @@ class PMEInstance {
                 }
             }
             numAtomsPerThread_[threadID] = myNumAtoms;
-#ifdef _OPENMP
         }
-#endif
         // We could intervene here and do some load balancing by inspecting the list.  Currently
         // the lazy approach of just assuming that the atoms are evenly distributed along c is used.
 
@@ -1656,6 +1655,7 @@ class PMEInstance {
         {
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             size_t entry = threadOffset[threadID];
@@ -1686,15 +1686,14 @@ class PMEInstance {
                                                splineOrder_, splineDerivativeLevel);
                 }
             }
-#ifdef _OPENMP
         }
-#endif
 // Finally, find all of the splines that this thread will need to handle
 #ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             auto &mySplineList = splinesPerThread_[threadID];
@@ -1707,9 +1706,7 @@ class PMEInstance {
                 }
                 ++count;
             }
-#ifdef _OPENMP
         }
-#endif
     }
 
     /*!
@@ -1924,15 +1921,17 @@ class PMEInstance {
         helpme::vector<Complex> buffer(nThreads_ * scratchRowDim);
 
 // A transform, with instant sort to CAB ordering for each local block
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
             int threadID = 0;
 #endif
             auto scratch = &buffer[threadID * scratchRowDim];
+#ifdef _OPENMP
 #pragma omp for
+#endif
             for (int c = 0; c < subsetOfCAlongA_; ++c) {
                 for (int b = 0; b < myGridDimensionB_; ++b) {
                     Real *gridPtr = realGrid + c * myGridDimensionB_ * gridDimensionA_ + b * gridDimensionA_;
@@ -1945,7 +1944,9 @@ class PMEInstance {
                     }
                 }
             }
+#ifdef _OPENMP
         }
+#endif
 
 #if HAVE_MPI == 1
         // Communicate A back to blocks

From 5f7ad1fb409d8641f7e42bdde4f812b14b34d066 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 09:45:43 -0500
Subject: [PATCH 06/33] Three more guards

---
 src/helpme.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index 3fa077b..d0dcab3 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1978,7 +1978,9 @@ class PMEInstance {
 
         // B transform
         size_t numCA = (size_t)subsetOfCAlongB_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ca = 0; ca < numCA; ++ca) {
             fftHelperB_.transform(buffer1 + ca * gridDimensionB_, FFTW_FORWARD);
         }
@@ -2026,7 +2028,9 @@ class PMEInstance {
 #endif
         // C transform
         size_t numBA = (size_t)subsetOfBAlongC_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ba = 0; ba < numBA; ++ba) {
             fftHelperC_.transform(buffer2 + ba * gridDimensionC_, FFTW_FORWARD);
         }
@@ -2053,7 +2057,9 @@ class PMEInstance {
 
         // C transform
         size_t numYX = (size_t)subsetOfBAlongC_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t yx = 0; yx < numYX; ++yx) {
             fftHelperC_.transform(convolvedGrid + yx * gridDimensionC_, FFTW_BACKWARD);
         }

From fe0622e5354edc8ee40eca24e64dba8400dbec50 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 15:50:23 -0500
Subject: [PATCH 07/33] More pragma guards

---
 src/helpme.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index d0dcab3..4f82de8 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -2111,11 +2111,15 @@ class PMEInstance {
 
         // B transform with instant sort of local blocks from CAB -> CBA order
         size_t numCA = (size_t)subsetOfCAlongB_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ca = 0; ca < numCA; ++ca) {
             fftHelperB_.transform(buffer1 + ca * gridDimensionB_, FFTW_BACKWARD);
         }
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (int c = 0; c < subsetOfCAlongB_; ++c) {
             for (int a = 0; a < myComplexGridDimensionA_; ++a) {
                 int cx = c * myComplexGridDimensionA_ * gridDimensionB_ + a * gridDimensionB_;
@@ -2162,7 +2166,9 @@ class PMEInstance {
 
         // A transform
         Real *realGrid = reinterpret_cast<Real *>(buffer2);
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (int cb = 0; cb < subsetOfCAlongA_ * myGridDimensionB_; ++cb) {
             fftHelperA_.transform(buffer1 + cb * complexGridDimensionA_, realGrid + cb * gridDimensionA_);
         }

From d600734f4e72607784be981ee250f02c26e27f02 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 15:55:19 -0500
Subject: [PATCH 08/33] 2 more guards

---
 src/helpme.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index 4f82de8..c57286c 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -2281,7 +2281,9 @@ class PMEInstance {
         }
         if (iAmNodeZero) transformedGrid[0] = 0;
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy) num_threads(nThreads_)
+#endif
         for (size_t yxz = 0; yxz < nyxz; ++yxz) {
             energy += transformedGrid[yxz] * transformedGrid[yxz] * influenceFunction[yxz];
             transformedGrid[yxz] *= influenceFunction[yxz];
@@ -2312,7 +2314,9 @@ class PMEInstance {
         }
         if (iAmNodeZero) transformedGrid[0] = Complex(0, 0);
         const size_t numCTerms(myNumKSumTermsC_);
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy) num_threads(nThreads_)
+#endif
         for (size_t yxz = 0; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             int kx = firstKSumTermA_ + xz / numCTerms;

From 3e9aae684a36537b07c0307e01c0e919d1f61417 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 17:16:26 -0500
Subject: [PATCH 09/33] Add brackets for non openmp to be consistent with other
 code blocks

---
 src/helpme.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/helpme.h b/src/helpme.h
index c57286c..f61ffc6 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1551,6 +1551,7 @@ class PMEInstance {
         {
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             for (size_t row = threadID; row < myGridDimensionC_; row += nThreads_) {
@@ -1565,9 +1566,7 @@ class PMEInstance {
                 spreadParametersImpl(atom, realGrid, nComponents, splineA, splineB, splineC, fractionalParameters,
                                      threadID);
             }
-#ifdef _OPENMP
         }
-#endif
         return realGrid;
     }
 
@@ -1926,6 +1925,7 @@ class PMEInstance {
         {
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             auto scratch = &buffer[threadID * scratchRowDim];
@@ -1944,9 +1944,7 @@ class PMEInstance {
                     }
                 }
             }
-#ifdef _OPENMP
         }
-#endif
 
 #if HAVE_MPI == 1
         // Communicate A back to blocks
@@ -2414,14 +2412,15 @@ class PMEInstance {
                 }
             }
         }
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
+#pragma omp for
 #else
+        {
             int threadID = 0;
 #endif
-#pragma omp for
             for (size_t atom = 0; atom < nAtoms; ++atom) {
                 const auto &cacheEntry = splineCache_[atom];
                 const auto &absAtom = cacheEntry.absoluteAtomNumber;

From 4327b35751d294a65f01a8350faf7d9c3c986b31 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Mon, 22 Nov 2021 17:21:20 -0500
Subject: [PATCH 10/33] Guard 2 more

---
 src/helpme.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index f61ffc6..daa4f0c 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -2802,7 +2802,9 @@ class PMEInstance {
         // Direct space, using simple O(N^2) algorithm.  This can be improved using a nonbonded list if needed.
         Real cutoffSquared = sphericalCutoff * sphericalCutoff;
         Real kappaSquared = kappa_ * kappa_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t i = 0; i < nAtoms; ++i) {
             const auto &coordsI = coordinates.row(i);
             Real *phiPtr = potential[i];
@@ -2834,7 +2836,9 @@ class PMEInstance {
         } else {
             std::logic_error("Unknown algorithm in helpme::computePAtAtomicSites");
         }
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t atom = 0; atom < nAtoms; ++atom) {
             const auto &cacheEntry = splineCache_[atom];
             const auto &absAtom = cacheEntry.absoluteAtomNumber;

From 6f85b6a0a8fb8d716d49b5185bd7b9aaf8d47723 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 06:30:45 -0500
Subject: [PATCH 11/33] Fix signed/unsigned comparison

---
 src/matrix.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/matrix.h b/src/matrix.h
index e3f2d0a..467382c 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -575,10 +575,10 @@ class Matrix {
         unsortedEigenVectors.transposeInPlace();
 
         std::vector<std::pair<Real, const Real*>> eigenPairs;
-        for (int val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
+        for (size_t val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
         std::sort(eigenPairs.begin(), eigenPairs.end());
         if (order == SortOrder::Descending) std::reverse(eigenPairs.begin(), eigenPairs.end());
-        for (int val = 0; val < nRows_; ++val) {
+        for (size_t val = 0; val < nRows_; ++val) {
             const auto& e = eigenPairs[val];
             eigenValues.data_[val] = std::get<0>(e);
             std::copy(std::get<1>(e), std::get<1>(e) + nCols_, sortedEigenVectors[val]);

From 489ca5089aab278459b44f0b021e1c39db5f6a3e Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 06:55:51 -0500
Subject: [PATCH 12/33] Fix signed/unsigned comparison

---
 src/helpme.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index daa4f0c..bb91de4 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1037,7 +1037,8 @@ class PMEInstance {
         if (coordinates.nRows() != parameters.nRows())
             throw std::runtime_error(
                 "Inconsistent number of coordinates and parameters; there should be nAtoms of each.");
-        if (parameters.nCols() != (nCartesian(parameterAngMom) - cartesianOffset))
+        if (n_param_cols < 0 ||
+            parameters.nCols() != (size_t)n_param_cols)
             throw std::runtime_error(
                 "Mismatch in the number of parameters provided and the parameter angular momentum");
     }

From 65e22cffef51f1e8adad494a6904caad8b9ba014 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:02:57 -0500
Subject: [PATCH 13/33] Fix signed/unsigne comparison

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index bb91de4..266da6d 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1595,7 +1595,7 @@ class PMEInstance {
         {
             int threadID = 0;
 #endif
-            for (size_t row = threadID; row < gridDimensionC_; row += nThreads_) {
+            for (int row = threadID; row < gridDimensionC_; row += nThreads_) {
                 gridAtomList_[row].clear();
             }
             auto &mySplineList = splinesPerThread_[threadID];

From 5e1363faf630f9759863cedea3250208e0c73dd2 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:23:07 -0500
Subject: [PATCH 14/33] Use integer type and cast to integer to be consistent
 and avoid losing bits.

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 266da6d..5c1801d 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1608,7 +1608,7 @@ class PMEInstance {
                               atomCoords[2] * recVecs_(2, 2) - EPS;
                 cCoord -= floor(cCoord);
                 short cStartingGridPoint = gridDimensionC_ * cCoord;
-                size_t thisAtomsThread = cStartingGridPoint % nThreads_;
+                int thisAtomsThread = (int)cStartingGridPoint % nThreads_;
                 const auto &cGridIterator = gridIteratorC_[cStartingGridPoint];
                 if (cGridIterator.size() && thisAtomsThread == threadID) {
                     Real aCoord = atomCoords[0] * recVecs_(0, 0) + atomCoords[1] * recVecs_(1, 0) +

From 4380e389d877e7acdb74c01ebd35dc8ffcc571f4 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:28:24 -0500
Subject: [PATCH 15/33] Comment out currently unused variable. Need to check if
 this should be gridIteratorC_ or not.

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 5c1801d..6c512e7 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1599,7 +1599,7 @@ class PMEInstance {
                 gridAtomList_[row].clear();
             }
             auto &mySplineList = splinesPerThread_[threadID];
-            const auto &gridIteratorC = threadedGridIteratorC_[threadID];
+            //const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
             mySplineList.clear();
             size_t myNumAtoms = 0;
             for (int atom = 0; atom < nAtoms; ++atom) {

From c7e2e7c95a20d71c672491ef93c1166aa2d31306 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:31:53 -0500
Subject: [PATCH 16/33] Fix signed/unsigned comparison

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 6c512e7..4c2e0f2 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1642,7 +1642,7 @@ class PMEInstance {
         // certain scale factor to try and minimize allocations in a not-too-wasteful manner.
         if (splineCache_.size() < numCacheEntries) {
             size_t newSize = static_cast<size_t>(1.2 * numCacheEntries);
-            for (int atom = splineCache_.size(); atom < newSize; ++atom)
+            for (size_t atom = splineCache_.size(); atom < newSize; ++atom)
                 splineCache_.emplace_back(splineOrder_, splineDerivativeLevel);
         }
         std::vector<size_t> threadOffset(nThreads_, 0);

From 13f53d9c39b05fe9cf31c526390f340f106ee00d Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:34:59 -0500
Subject: [PATCH 17/33] Make loop var int since all other vars are int

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 4c2e0f2..8dbcca1 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1659,7 +1659,7 @@ class PMEInstance {
             int threadID = 0;
 #endif
             size_t entry = threadOffset[threadID];
-            for (size_t cRow = threadID; cRow < gridDimensionC_; cRow += nThreads_) {
+            for (int cRow = threadID; cRow < gridDimensionC_; cRow += nThreads_) {
                 for (const auto &gridPointAndAtom : gridAtomList_[cRow]) {
                     size_t atom = gridPointAndAtom.second;
                     const Real *atomCoords = coords[atom];

From 44524427140f2ea58ca36107759838e733ce4cfe Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:37:26 -0500
Subject: [PATCH 18/33] Make loop var int

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 8dbcca1..228b400 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1555,7 +1555,7 @@ class PMEInstance {
         {
             int threadID = 0;
 #endif
-            for (size_t row = threadID; row < myGridDimensionC_; row += nThreads_) {
+            for (int row = threadID; row < myGridDimensionC_; row += nThreads_) {
                 std::fill(&realGrid[row * numBA], &realGrid[(row + 1) * numBA], Real(0));
             }
             for (const auto &spline : splinesPerThread_[threadID]) {

From 4a7f7cfc221da2051f05d099a22920c812d95adc Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 07:49:28 -0500
Subject: [PATCH 19/33] Protect MPI-only variable

---
 src/helpme.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/helpme.h b/src/helpme.h
index 228b400..e9ec691 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1881,14 +1881,20 @@ class PMEInstance {
      * \return Pointer to the transformed grid, which is stored in one of the buffers in BAC order.
      */
     Complex *forwardTransform(Real *realGrid) {
+#if HAVE_MPI == 1
         Real *__restrict__ realCBA;
+#endif
         Complex *__restrict__ buffer1, *__restrict__ buffer2;
         if (realGrid == reinterpret_cast<Real *>(workSpace1_.data())) {
+#if HAVE_MPI == 1
             realCBA = reinterpret_cast<Real *>(workSpace2_.data());
+#endif
             buffer1 = workSpace2_.data();
             buffer2 = workSpace1_.data();
         } else {
+#if HAVE_MPI == 1
             realCBA = reinterpret_cast<Real *>(workSpace1_.data());
+#endif
             buffer1 = workSpace1_.data();
             buffer2 = workSpace2_.data();
         }

From 48603e525673318c19d4d88f7863dce806502f04 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 08:33:47 -0500
Subject: [PATCH 20/33] Change int to size_t to avoid signed/unsigned
 comparison

---
 src/matrix.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/matrix.h b/src/matrix.h
index 467382c..ba47982 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -333,8 +333,8 @@ class Matrix {
      */
     void assertSymmetric(const Real& threshold = 1e-10f) const {
         assertSquare();
-        for (int row = 0; row < nRows_; ++row) {
-            for (int col = 0; col < row; ++col) {
+        for (size_t row = 0; row < nRows_; ++row) {
+            for (size_t col = 0; col < row; ++col) {
                 if (std::abs(data_[row * nCols_ + col] - data_[col * nCols_ + row]) > threshold)
                     throw std::runtime_error("Unexpected non-symmetric matrix found.");
             }

From c5f646af492098cb686495a51ca1b7137712e922 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 08:40:09 -0500
Subject: [PATCH 21/33] Comment out unused var

---
 src/lapack_wrapper.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/lapack_wrapper.h b/src/lapack_wrapper.h
index fbe62d9..f6c5956 100644
--- a/src/lapack_wrapper.h
+++ b/src/lapack_wrapper.h
@@ -128,7 +128,8 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
     Real threshold_norm;
     Real threshold;
     Real tan_phi, sin_phi, cos_phi, tan2_phi, sin2_phi, cos2_phi;
-    Real sin_2phi, cos_2phi, cot_2phi;
+    Real sin_2phi, cot_2phi;
+    //Real cos_2phi;
     Real dum1;
     Real dum2;
     Real dum3;
@@ -181,7 +182,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
                 if (tan_phi < 0) sin_phi = -sin_phi;
                 cos_phi = sqrt(cos2_phi);
                 sin_2phi = 2 * sin_phi * cos_phi;
-                cos_2phi = cos2_phi - sin2_phi;
+                //cos_2phi = cos2_phi - sin2_phi;
 
                 // Rotate columns k and m for both the matrix A
                 //     and the matrix of eigenvectors.

From 78ab56686eac055e1942a141ef65cbc3b168fe46 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 08:42:58 -0500
Subject: [PATCH 22/33] Change to size_t to avoid signed/unsigned comparison

---
 src/matrix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/matrix.h b/src/matrix.h
index ba47982..599cf27 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -361,7 +361,7 @@ class Matrix {
         Matrix evecs = std::get<1>(eigenPairs);
         evalsReal.applyOperationToEachElement(function);
         Matrix evecsT = evecs.transpose();
-        for (int row = 0; row < nRows_; ++row) {
+        for (size_t row = 0; row < nRows_; ++row) {
             Real transformedEigenvalue = evalsReal[row][0];
             std::for_each(evecsT.data_ + row * nCols_, evecsT.data_ + (row + 1) * nCols_,
                           [&](Real& val) { val *= transformedEigenvalue; });

From ca0bd2e19f0e345ce0d2599474bffed17df262e2 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 10:26:16 -0500
Subject: [PATCH 23/33] Protect against signed int comparison to unsigned
 overflow

---
 src/splines.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/splines.h b/src/splines.h
index acbe7f6..f6015d3 100644
--- a/src/splines.h
+++ b/src/splines.h
@@ -88,7 +88,10 @@ class BSpline {
         derivativeLevel_ = derivativeLevel;
 
         // The +1 is to account for the fact that we need to store entries up to and including the max.
-        if (splines_.nRows() < derivativeLevel + 1 || splines_.nCols() != order)
+        if (derivativeLevel < 0 || order < 0) {
+          throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
+        }
+        if (splines_.nRows() < (size_t)derivativeLevel + 1 || splines_.nCols() != (size_t)order)
             splines_ = Matrix<Real>(derivativeLevel + 1, order);
 
         splines_.setZero();

From 3dde10e741ed845aa0562941c3fe52ce82e67d1f Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 10:29:55 -0500
Subject: [PATCH 24/33] Fix 3 signed/unsigned comparison

---
 src/matrix.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/matrix.h b/src/matrix.h
index 599cf27..cb13439 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -397,10 +397,10 @@ class Matrix {
             throw std::runtime_error("Attempting to multiply matrices with incompatible dimensions.");
         Matrix product(nRows_, other.nCols_);
         Real* output = product.data_;
-        for (int row = 0; row < nRows_; ++row) {
+        for (size_t row = 0; row < nRows_; ++row) {
             const Real* rowPtr = data_ + row * nCols_;
-            for (int col = 0; col < other.nCols_; ++col) {
-                for (int link = 0; link < nCols_; ++link) {
+            for (size_t col = 0; col < other.nCols_; ++col) {
+                for (size_t link = 0; link < nCols_; ++link) {
                     *output += rowPtr[link] * other.data_[link * other.nCols_ + col];
                 }
                 ++output;

From 8ceaae954f5d2108a8700c9b67055c23647a6092 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 10:35:57 -0500
Subject: [PATCH 25/33] Init uninitialized var

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index e9ec691..c55d94f 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -2965,7 +2965,7 @@ class PMEInstance {
 
         filterAtomsAndBuildSplineCache(parameterAngMom, coordinates);
         auto realGrid = spreadParameters(parameterAngMom, parameters);
-        Real energy;
+        Real energy = 0;
         if (algorithmType_ == AlgorithmType::PME) {
             auto gridAddress = forwardTransform(realGrid);
             energy = convolveE(gridAddress);

From 51c65a7038af806103506b6608fe7ac1a54f2143 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Tue, 23 Nov 2021 13:13:46 -0500
Subject: [PATCH 26/33] Add missing variable declaration

---
 src/helpme.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/helpme.h b/src/helpme.h
index c55d94f..7a54134 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1037,6 +1037,7 @@ class PMEInstance {
         if (coordinates.nRows() != parameters.nRows())
             throw std::runtime_error(
                 "Inconsistent number of coordinates and parameters; there should be nAtoms of each.");
+        int n_param_cols = nCartesian(parameterAngMom) - cartesianOffset;
         if (n_param_cols < 0 ||
             parameters.nCols() != (size_t)n_param_cols)
             throw std::runtime_error(

From 8618fbd7e235ff2f19777bc79e92de40189c2c53 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 09:28:48 -0500
Subject: [PATCH 27/33] Fix signed/unsigned comparison

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index 7a54134..b195658 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1603,7 +1603,7 @@ class PMEInstance {
             //const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
             mySplineList.clear();
             size_t myNumAtoms = 0;
-            for (int atom = 0; atom < nAtoms; ++atom) {
+            for (size_t atom = 0; atom < nAtoms; ++atom) {
                 const Real *atomCoords = coords[atom];
                 Real cCoord = atomCoords[0] * recVecs_(0, 2) + atomCoords[1] * recVecs_(1, 2) +
                               atomCoords[2] * recVecs_(2, 2) - EPS;

From 04ac60153b0f77b0167ae09fd64ae6911cf042f8 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 09:29:09 -0500
Subject: [PATCH 28/33] Update the standalone include file

---
 single_include/helpme_standalone.h | 106 +++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 29 deletions(-)

diff --git a/single_include/helpme_standalone.h b/single_include/helpme_standalone.h
index 8e57c01..acb8f5d 100644
--- a/single_include/helpme_standalone.h
+++ b/single_include/helpme_standalone.h
@@ -210,7 +210,8 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
     Real threshold_norm;
     Real threshold;
     Real tan_phi, sin_phi, cos_phi, tan2_phi, sin2_phi, cos2_phi;
-    Real sin_2phi, cos_2phi, cot_2phi;
+    Real sin_2phi, cot_2phi;
+    //Real cos_2phi;
     Real dum1;
     Real dum2;
     Real dum3;
@@ -263,7 +264,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
                 if (tan_phi < 0) sin_phi = -sin_phi;
                 cos_phi = sqrt(cos2_phi);
                 sin_2phi = 2 * sin_phi * cos_phi;
-                cos_2phi = cos2_phi - sin2_phi;
+                //cos_2phi = cos2_phi - sin2_phi;
 
                 // Rotate columns k and m for both the matrix A
                 //     and the matrix of eigenvectors.
@@ -793,8 +794,8 @@ class Matrix {
      */
     void assertSymmetric(const Real& threshold = 1e-10f) const {
         assertSquare();
-        for (int row = 0; row < nRows_; ++row) {
-            for (int col = 0; col < row; ++col) {
+        for (size_t row = 0; row < nRows_; ++row) {
+            for (size_t col = 0; col < row; ++col) {
                 if (std::abs(data_[row * nCols_ + col] - data_[col * nCols_ + row]) > threshold)
                     throw std::runtime_error("Unexpected non-symmetric matrix found.");
             }
@@ -821,7 +822,7 @@ class Matrix {
         Matrix evecs = std::get<1>(eigenPairs);
         evalsReal.applyOperationToEachElement(function);
         Matrix evecsT = evecs.transpose();
-        for (int row = 0; row < nRows_; ++row) {
+        for (size_t row = 0; row < nRows_; ++row) {
             Real transformedEigenvalue = evalsReal[row][0];
             std::for_each(evecsT.data_ + row * nCols_, evecsT.data_ + (row + 1) * nCols_,
                           [&](Real& val) { val *= transformedEigenvalue; });
@@ -857,10 +858,10 @@ class Matrix {
             throw std::runtime_error("Attempting to multiply matrices with incompatible dimensions.");
         Matrix product(nRows_, other.nCols_);
         Real* output = product.data_;
-        for (int row = 0; row < nRows_; ++row) {
+        for (size_t row = 0; row < nRows_; ++row) {
             const Real* rowPtr = data_ + row * nCols_;
-            for (int col = 0; col < other.nCols_; ++col) {
-                for (int link = 0; link < nCols_; ++link) {
+            for (size_t col = 0; col < other.nCols_; ++col) {
+                for (size_t link = 0; link < nCols_; ++link) {
                     *output += rowPtr[link] * other.data_[link * other.nCols_ + col];
                 }
                 ++output;
@@ -1035,10 +1036,10 @@ class Matrix {
         unsortedEigenVectors.transposeInPlace();
 
         std::vector<std::pair<Real, const Real*>> eigenPairs;
-        for (int val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
+        for (size_t val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
         std::sort(eigenPairs.begin(), eigenPairs.end());
         if (order == SortOrder::Descending) std::reverse(eigenPairs.begin(), eigenPairs.end());
-        for (int val = 0; val < nRows_; ++val) {
+        for (size_t val = 0; val < nRows_; ++val) {
             const auto& e = eigenPairs[val];
             eigenValues.data_[val] = std::get<0>(e);
             std::copy(std::get<1>(e), std::get<1>(e) + nCols_, sortedEigenVectors[val]);
@@ -2263,7 +2264,10 @@ class BSpline {
         derivativeLevel_ = derivativeLevel;
 
         // The +1 is to account for the fact that we need to store entries up to and including the max.
-        if (splines_.nRows() < derivativeLevel + 1 || splines_.nCols() != order)
+        if (derivativeLevel < 0 || order < 0) {
+          throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
+        }
+        if (splines_.nRows() < (size_t)derivativeLevel + 1 || splines_.nCols() != (size_t)order)
             splines_ = Matrix<Real>(derivativeLevel + 1, order);
 
         splines_.setZero();
@@ -2378,7 +2382,9 @@ namespace helpme {
 template <typename Real>
 void permuteABCtoCBA(Real const *__restrict__ abcPtr, int const aDimension, int const bDimension, int const cDimension,
                      Real *__restrict__ cbaPtr, size_t nThreads = 1) {
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
     for (int C = 0; C <= -1 + cDimension; ++C)
         for (int B = 0; B <= -1 + bDimension; ++B)
             for (int A = 0; A <= -1 + aDimension; ++A)
@@ -2398,7 +2404,9 @@ void permuteABCtoCBA(Real const *__restrict__ abcPtr, int const aDimension, int
 template <typename Real>
 void permuteABCtoACB(Real const *__restrict__ abcPtr, int const aDimension, int const bDimension, int const cDimension,
                      Real *__restrict__ acbPtr, size_t nThreads = 1) {
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
     for (int A = 0; A <= -1 + aDimension; ++A)
         for (int C = 0; C <= -1 + cDimension; ++C)
             for (int B = 0; B <= -1 + bDimension; ++B)
@@ -3233,7 +3241,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy, Vxx, Vxy, Vyy, Vxz, Vyz, Vzz) num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;
@@ -3340,7 +3350,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy, Vxx, Vxy, Vyy, Vxz, Vyz, Vzz) num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;
@@ -3449,7 +3461,9 @@ class PMEInstance {
         if (coordinates.nRows() != parameters.nRows())
             throw std::runtime_error(
                 "Inconsistent number of coordinates and parameters; there should be nAtoms of each.");
-        if (parameters.nCols() != (nCartesian(parameterAngMom) - cartesianOffset))
+        int n_param_cols = nCartesian(parameterAngMom) - cartesianOffset;
+        if (n_param_cols < 0 ||
+            parameters.nCols() != (size_t)n_param_cols)
             throw std::runtime_error(
                 "Mismatch in the number of parameters provided and the parameter angular momentum");
     }
@@ -3499,7 +3513,9 @@ class PMEInstance {
         // Exclude m=0 cell.
         int start = (nodeZero ? 1 : 0);
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads)
+#endif
         for (size_t yxz = start; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             short ky = yxz / nxz;
@@ -3956,14 +3972,15 @@ class PMEInstance {
 
         int nComponents = nCartesian(parameterAngMom);
         size_t numBA = (size_t)myGridDimensionB_ * myGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
-            for (size_t row = threadID; row < myGridDimensionC_; row += nThreads_) {
+            for (int row = threadID; row < myGridDimensionC_; row += nThreads_) {
                 std::fill(&realGrid[row * numBA], &realGrid[(row + 1) * numBA], Real(0));
             }
             for (const auto &spline : splinesPerThread_[threadID]) {
@@ -3995,27 +4012,28 @@ class PMEInstance {
         gridAtomList_.resize(gridDimensionC_);
 
 // Classify atoms to their worker threads first, then construct splines for each thread
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
-            for (size_t row = threadID; row < gridDimensionC_; row += nThreads_) {
+            for (int row = threadID; row < gridDimensionC_; row += nThreads_) {
                 gridAtomList_[row].clear();
             }
             auto &mySplineList = splinesPerThread_[threadID];
-            const auto &gridIteratorC = threadedGridIteratorC_[threadID];
+            //const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
             mySplineList.clear();
             size_t myNumAtoms = 0;
-            for (int atom = 0; atom < nAtoms; ++atom) {
+            for (size_t atom = 0; atom < nAtoms; ++atom) {
                 const Real *atomCoords = coords[atom];
                 Real cCoord = atomCoords[0] * recVecs_(0, 2) + atomCoords[1] * recVecs_(1, 2) +
                               atomCoords[2] * recVecs_(2, 2) - EPS;
                 cCoord -= floor(cCoord);
                 short cStartingGridPoint = gridDimensionC_ * cCoord;
-                size_t thisAtomsThread = cStartingGridPoint % nThreads_;
+                int thisAtomsThread = (int)cStartingGridPoint % nThreads_;
                 const auto &cGridIterator = gridIteratorC_[cStartingGridPoint];
                 if (cGridIterator.size() && thisAtomsThread == threadID) {
                     Real aCoord = atomCoords[0] * recVecs_(0, 0) + atomCoords[1] * recVecs_(1, 0) +
@@ -4039,7 +4057,6 @@ class PMEInstance {
             }
             numAtomsPerThread_[threadID] = myNumAtoms;
         }
-
         // We could intervene here and do some load balancing by inspecting the list.  Currently
         // the lazy approach of just assuming that the atoms are evenly distributed along c is used.
 
@@ -4050,7 +4067,7 @@ class PMEInstance {
         // certain scale factor to try and minimize allocations in a not-too-wasteful manner.
         if (splineCache_.size() < numCacheEntries) {
             size_t newSize = static_cast<size_t>(1.2 * numCacheEntries);
-            for (int atom = splineCache_.size(); atom < newSize; ++atom)
+            for (size_t atom = splineCache_.size(); atom < newSize; ++atom)
                 splineCache_.emplace_back(splineOrder_, splineDerivativeLevel);
         }
         std::vector<size_t> threadOffset(nThreads_, 0);
@@ -4058,15 +4075,16 @@ class PMEInstance {
             threadOffset[thread] = threadOffset[thread - 1] + numAtomsPerThread_[thread - 1];
         }
 
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             size_t entry = threadOffset[threadID];
-            for (size_t cRow = threadID; cRow < gridDimensionC_; cRow += nThreads_) {
+            for (int cRow = threadID; cRow < gridDimensionC_; cRow += nThreads_) {
                 for (const auto &gridPointAndAtom : gridAtomList_[cRow]) {
                     size_t atom = gridPointAndAtom.second;
                     const Real *atomCoords = coords[atom];
@@ -4094,13 +4112,13 @@ class PMEInstance {
                 }
             }
         }
-
 // Finally, find all of the splines that this thread will need to handle
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             auto &mySplineList = splinesPerThread_[threadID];
@@ -4288,14 +4306,20 @@ class PMEInstance {
      * \return Pointer to the transformed grid, which is stored in one of the buffers in BAC order.
      */
     Complex *forwardTransform(Real *realGrid) {
+#if HAVE_MPI == 1
         Real *__restrict__ realCBA;
+#endif
         Complex *__restrict__ buffer1, *__restrict__ buffer2;
         if (realGrid == reinterpret_cast<Real *>(workSpace1_.data())) {
+#if HAVE_MPI == 1
             realCBA = reinterpret_cast<Real *>(workSpace2_.data());
+#endif
             buffer1 = workSpace2_.data();
             buffer2 = workSpace1_.data();
         } else {
+#if HAVE_MPI == 1
             realCBA = reinterpret_cast<Real *>(workSpace1_.data());
+#endif
             buffer1 = workSpace1_.data();
             buffer2 = workSpace2_.data();
         }
@@ -4328,15 +4352,18 @@ class PMEInstance {
         helpme::vector<Complex> buffer(nThreads_ * scratchRowDim);
 
 // A transform, with instant sort to CAB ordering for each local block
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
 #else
+        {
             int threadID = 0;
 #endif
             auto scratch = &buffer[threadID * scratchRowDim];
+#ifdef _OPENMP
 #pragma omp for
+#endif
             for (int c = 0; c < subsetOfCAlongA_; ++c) {
                 for (int b = 0; b < myGridDimensionB_; ++b) {
                     Real *gridPtr = realGrid + c * myGridDimensionB_ * gridDimensionA_ + b * gridDimensionA_;
@@ -4381,7 +4408,9 @@ class PMEInstance {
 
         // B transform
         size_t numCA = (size_t)subsetOfCAlongB_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ca = 0; ca < numCA; ++ca) {
             fftHelperB_.transform(buffer1 + ca * gridDimensionB_, FFTW_FORWARD);
         }
@@ -4429,7 +4458,9 @@ class PMEInstance {
 #endif
         // C transform
         size_t numBA = (size_t)subsetOfBAlongC_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ba = 0; ba < numBA; ++ba) {
             fftHelperC_.transform(buffer2 + ba * gridDimensionC_, FFTW_FORWARD);
         }
@@ -4456,7 +4487,9 @@ class PMEInstance {
 
         // C transform
         size_t numYX = (size_t)subsetOfBAlongC_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t yx = 0; yx < numYX; ++yx) {
             fftHelperC_.transform(convolvedGrid + yx * gridDimensionC_, FFTW_BACKWARD);
         }
@@ -4508,11 +4541,15 @@ class PMEInstance {
 
         // B transform with instant sort of local blocks from CAB -> CBA order
         size_t numCA = (size_t)subsetOfCAlongB_ * myComplexGridDimensionA_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t ca = 0; ca < numCA; ++ca) {
             fftHelperB_.transform(buffer1 + ca * gridDimensionB_, FFTW_BACKWARD);
         }
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (int c = 0; c < subsetOfCAlongB_; ++c) {
             for (int a = 0; a < myComplexGridDimensionA_; ++a) {
                 int cx = c * myComplexGridDimensionA_ * gridDimensionB_ + a * gridDimensionB_;
@@ -4559,7 +4596,9 @@ class PMEInstance {
 
         // A transform
         Real *realGrid = reinterpret_cast<Real *>(buffer2);
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (int cb = 0; cb < subsetOfCAlongA_ * myGridDimensionB_; ++cb) {
             fftHelperA_.transform(buffer1 + cb * complexGridDimensionA_, realGrid + cb * gridDimensionA_);
         }
@@ -4672,7 +4711,9 @@ class PMEInstance {
         }
         if (iAmNodeZero) transformedGrid[0] = 0;
 // Writing the three nested loops in one allows for better load balancing in parallel.
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy) num_threads(nThreads_)
+#endif
         for (size_t yxz = 0; yxz < nyxz; ++yxz) {
             energy += transformedGrid[yxz] * transformedGrid[yxz] * influenceFunction[yxz];
             transformedGrid[yxz] *= influenceFunction[yxz];
@@ -4703,7 +4744,9 @@ class PMEInstance {
         }
         if (iAmNodeZero) transformedGrid[0] = Complex(0, 0);
         const size_t numCTerms(myNumKSumTermsC_);
+#ifdef _OPENMP
 #pragma omp parallel for reduction(+ : energy) num_threads(nThreads_)
+#endif
         for (size_t yxz = 0; yxz < nyxz; ++yxz) {
             size_t xz = yxz % nxz;
             int kx = firstKSumTermA_ + xz / numCTerms;
@@ -4801,14 +4844,15 @@ class PMEInstance {
                 }
             }
         }
+#ifdef _OPENMP
 #pragma omp parallel num_threads(nThreads_)
         {
-#ifdef _OPENMP
             int threadID = omp_get_thread_num();
+#pragma omp for
 #else
+        {
             int threadID = 0;
 #endif
-#pragma omp for
             for (size_t atom = 0; atom < nAtoms; ++atom) {
                 const auto &cacheEntry = splineCache_[atom];
                 const auto &absAtom = cacheEntry.absoluteAtomNumber;
@@ -5190,7 +5234,9 @@ class PMEInstance {
         // Direct space, using simple O(N^2) algorithm.  This can be improved using a nonbonded list if needed.
         Real cutoffSquared = sphericalCutoff * sphericalCutoff;
         Real kappaSquared = kappa_ * kappa_;
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t i = 0; i < nAtoms; ++i) {
             const auto &coordsI = coordinates.row(i);
             Real *phiPtr = potential[i];
@@ -5222,7 +5268,9 @@ class PMEInstance {
         } else {
             std::logic_error("Unknown algorithm in helpme::computePAtAtomicSites");
         }
+#ifdef _OPENMP
 #pragma omp parallel for num_threads(nThreads_)
+#endif
         for (size_t atom = 0; atom < nAtoms; ++atom) {
             const auto &cacheEntry = splineCache_[atom];
             const auto &absAtom = cacheEntry.absoluteAtomNumber;
@@ -5342,7 +5390,7 @@ class PMEInstance {
 
         filterAtomsAndBuildSplineCache(parameterAngMom, coordinates);
         auto realGrid = spreadParameters(parameterAngMom, parameters);
-        Real energy;
+        Real energy = 0;
         if (algorithmType_ == AlgorithmType::PME) {
             auto gridAddress = forwardTransform(realGrid);
             energy = convolveE(gridAddress);

From d2e37c6bb53540c53fcce29901d27f9f5db2360a Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 09:37:45 -0500
Subject: [PATCH 29/33] Init uninitialized var in Matrix constructor

---
 src/matrix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/matrix.h b/src/matrix.h
index cb13439..f57234b 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -185,7 +185,7 @@ class Matrix {
     /*!
      * \brief Matrix Constructs an empty matrix.
      */
-    Matrix() : nRows_(0), nCols_(0) {}
+    Matrix() : nRows_(0), nCols_(0), data_(0) {}
 
     /*!
      * \brief Matrix Constructs a new matrix, allocating memory.

From 46bb70360973025fa104e60e1118d170c392b07d Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 09:46:15 -0500
Subject: [PATCH 30/33] Init uninitialized var

---
 src/helpme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/helpme.h b/src/helpme.h
index b195658..a00261d 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -451,7 +451,7 @@ class PMEInstance {
             }
         }
 
-        Real *potentialGrid;
+        Real *potentialGrid = 0;
         if (algorithmType_ == AlgorithmType::PME) {
             auto gridAddress = forwardTransform(realGrid);
             if (virial.nRows() == 0 && virial.nCols() == 0) {

From f1b3c4f1d0258b9f1c785de0f4c2dfff299244e9 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 09:46:24 -0500
Subject: [PATCH 31/33] Update standalone include

---
 single_include/helpme_standalone.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/single_include/helpme_standalone.h b/single_include/helpme_standalone.h
index acb8f5d..56edcd5 100644
--- a/single_include/helpme_standalone.h
+++ b/single_include/helpme_standalone.h
@@ -646,7 +646,7 @@ class Matrix {
     /*!
      * \brief Matrix Constructs an empty matrix.
      */
-    Matrix() : nRows_(0), nCols_(0) {}
+    Matrix() : nRows_(0), nCols_(0), data_(0) {}
 
     /*!
      * \brief Matrix Constructs a new matrix, allocating memory.
@@ -2875,7 +2875,7 @@ class PMEInstance {
             }
         }
 
-        Real *potentialGrid;
+        Real *potentialGrid = 0;
         if (algorithmType_ == AlgorithmType::PME) {
             auto gridAddress = forwardTransform(realGrid);
             if (virial.nRows() == 0 && virial.nCols() == 0) {

From 64bede2d626571651910b0b4ddff3180305afaf0 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 10:16:32 -0500
Subject: [PATCH 32/33] Try to fix linting issues

---
 src/helpme.h         | 5 ++---
 src/lapack_wrapper.h | 4 ++--
 src/matrix.h         | 3 ++-
 src/splines.h        | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/helpme.h b/src/helpme.h
index a00261d..b33eb53 100644
--- a/src/helpme.h
+++ b/src/helpme.h
@@ -1038,8 +1038,7 @@ class PMEInstance {
             throw std::runtime_error(
                 "Inconsistent number of coordinates and parameters; there should be nAtoms of each.");
         int n_param_cols = nCartesian(parameterAngMom) - cartesianOffset;
-        if (n_param_cols < 0 ||
-            parameters.nCols() != (size_t)n_param_cols)
+        if (n_param_cols < 0 || parameters.nCols() != (size_t)n_param_cols)
             throw std::runtime_error(
                 "Mismatch in the number of parameters provided and the parameter angular momentum");
     }
@@ -1600,7 +1599,7 @@ class PMEInstance {
                 gridAtomList_[row].clear();
             }
             auto &mySplineList = splinesPerThread_[threadID];
-            //const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
+            // const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
             mySplineList.clear();
             size_t myNumAtoms = 0;
             for (size_t atom = 0; atom < nAtoms; ++atom) {
diff --git a/src/lapack_wrapper.h b/src/lapack_wrapper.h
index f6c5956..c2abe12 100644
--- a/src/lapack_wrapper.h
+++ b/src/lapack_wrapper.h
@@ -129,7 +129,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
     Real threshold;
     Real tan_phi, sin_phi, cos_phi, tan2_phi, sin2_phi, cos2_phi;
     Real sin_2phi, cot_2phi;
-    //Real cos_2phi;
+    // Real cos_2phi;
     Real dum1;
     Real dum2;
     Real dum3;
@@ -182,7 +182,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
                 if (tan_phi < 0) sin_phi = -sin_phi;
                 cos_phi = sqrt(cos2_phi);
                 sin_2phi = 2 * sin_phi * cos_phi;
-                //cos_2phi = cos2_phi - sin2_phi;
+                // cos_2phi = cos2_phi - sin2_phi;
 
                 // Rotate columns k and m for both the matrix A
                 //     and the matrix of eigenvectors.
diff --git a/src/matrix.h b/src/matrix.h
index f57234b..2c85088 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -575,7 +575,8 @@ class Matrix {
         unsortedEigenVectors.transposeInPlace();
 
         std::vector<std::pair<Real, const Real*>> eigenPairs;
-        for (size_t val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
+        for (size_t val = 0; val < nRows_; ++val)
+            eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
         std::sort(eigenPairs.begin(), eigenPairs.end());
         if (order == SortOrder::Descending) std::reverse(eigenPairs.begin(), eigenPairs.end());
         for (size_t val = 0; val < nRows_; ++val) {
diff --git a/src/splines.h b/src/splines.h
index f6015d3..2b3c1a8 100644
--- a/src/splines.h
+++ b/src/splines.h
@@ -89,7 +89,7 @@ class BSpline {
 
         // The +1 is to account for the fact that we need to store entries up to and including the max.
         if (derivativeLevel < 0 || order < 0) {
-          throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
+            throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
         }
         if (splines_.nRows() < (size_t)derivativeLevel + 1 || splines_.nCols() != (size_t)order)
             splines_ = Matrix<Real>(derivativeLevel + 1, order);

From 42c686f6ee80182580ef861dd3b4bc90f056dad3 Mon Sep 17 00:00:00 2001
From: "Daniel R. Roe" <daniel.r.roe@gmail.com>
Date: Wed, 24 Nov 2021 10:27:54 -0500
Subject: [PATCH 33/33] Fix linting issues in the single include

---
 single_include/helpme_standalone.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/single_include/helpme_standalone.h b/single_include/helpme_standalone.h
index 56edcd5..dd02231 100644
--- a/single_include/helpme_standalone.h
+++ b/single_include/helpme_standalone.h
@@ -211,7 +211,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
     Real threshold;
     Real tan_phi, sin_phi, cos_phi, tan2_phi, sin2_phi, cos2_phi;
     Real sin_2phi, cot_2phi;
-    //Real cos_2phi;
+    // Real cos_2phi;
     Real dum1;
     Real dum2;
     Real dum3;
@@ -264,7 +264,7 @@ void JacobiCyclicDiagonalization(Real *eigenvalues, Real *eigenvectors, const Re
                 if (tan_phi < 0) sin_phi = -sin_phi;
                 cos_phi = sqrt(cos2_phi);
                 sin_2phi = 2 * sin_phi * cos_phi;
-                //cos_2phi = cos2_phi - sin2_phi;
+                // cos_2phi = cos2_phi - sin2_phi;
 
                 // Rotate columns k and m for both the matrix A
                 //     and the matrix of eigenvectors.
@@ -1036,7 +1036,8 @@ class Matrix {
         unsortedEigenVectors.transposeInPlace();
 
         std::vector<std::pair<Real, const Real*>> eigenPairs;
-        for (size_t val = 0; val < nRows_; ++val) eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
+        for (size_t val = 0; val < nRows_; ++val)
+            eigenPairs.push_back({eigenValues[val][0], unsortedEigenVectors[val]});
         std::sort(eigenPairs.begin(), eigenPairs.end());
         if (order == SortOrder::Descending) std::reverse(eigenPairs.begin(), eigenPairs.end());
         for (size_t val = 0; val < nRows_; ++val) {
@@ -2265,7 +2266,7 @@ class BSpline {
 
         // The +1 is to account for the fact that we need to store entries up to and including the max.
         if (derivativeLevel < 0 || order < 0) {
-          throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
+            throw std::runtime_error("BSpline::update: derivativeLevel and/or order < 0.");
         }
         if (splines_.nRows() < (size_t)derivativeLevel + 1 || splines_.nCols() != (size_t)order)
             splines_ = Matrix<Real>(derivativeLevel + 1, order);
@@ -3462,8 +3463,7 @@ class PMEInstance {
             throw std::runtime_error(
                 "Inconsistent number of coordinates and parameters; there should be nAtoms of each.");
         int n_param_cols = nCartesian(parameterAngMom) - cartesianOffset;
-        if (n_param_cols < 0 ||
-            parameters.nCols() != (size_t)n_param_cols)
+        if (n_param_cols < 0 || parameters.nCols() != (size_t)n_param_cols)
             throw std::runtime_error(
                 "Mismatch in the number of parameters provided and the parameter angular momentum");
     }
@@ -4024,7 +4024,7 @@ class PMEInstance {
                 gridAtomList_[row].clear();
             }
             auto &mySplineList = splinesPerThread_[threadID];
-            //const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
+            // const auto &gridIteratorC = threadedGridIteratorC_[threadID]; FIXME currently unused
             mySplineList.clear();
             size_t myNumAtoms = 0;
             for (size_t atom = 0; atom < nAtoms; ++atom) {