From a8faee9b782fe3c248378a82d234464449eea80e Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Tue, 10 Sep 2024 18:53:25 -0700
Subject: [PATCH 01/17] Vectorizing the finding of pairs in a single processor

---
 shakermaker/shakermaker.py | 668 ++++++++++++++++++++++++++++++++-----
 1 file changed, 583 insertions(+), 85 deletions(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index 8dbe7e5..9574594 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -9,6 +9,7 @@
 import imp
 import traceback
 from time import perf_counter
+from tqdm import tqdm
 
 
 try:
@@ -1561,6 +1562,9 @@ def gen_greens_function_database_pairs(self,
         showProgress=True,
         store_here=None,
         npairs_max=100000,
+        using_vectorize_manner=False,
+        cfactor = 0.5,
+        longer_version=True
         ):
         """Run the simulation. 
         
@@ -1610,109 +1614,603 @@ def gen_greens_function_database_pairs(self,
                           .format(self._source.nsources, self._receivers.nstations,
                                   self._source.nsources*self._receivers.nstations, dt, nfft))
 
-        ipair = 0
-
-        npairs = self._receivers.nstations*len(self._source._pslist)
-
-        print(f"{npairs=}")
+        if rank == 0:        
+            print("SahkerMaker.run - starting")
+            print(f"Number of sources: {self._source.nsources}")
+            print(f"Number of receivers: {self._receivers.nstations}")
+            print(f"Total src-rcv pairs: {self._source.nsources*self._receivers.nstations}")
+            print(f"{dt=}")
+            print(f"{nfft=}")
 
-        # dists = np.zeros((npairs, 2))
 
-        pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32)
-        dd_of_pairs = np.empty(npairs_max, dtype=np.double)
-        dh_of_pairs = np.empty(npairs_max, dtype=np.double)
-        dv_of_pairs = np.empty(npairs_max, dtype=np.double)
-        zrec_of_pairs = np.empty(npairs_max, dtype=np.double)
-        zsrc_of_pairs = np.empty(npairs_max, dtype=np.double)
+        factor = cfactor
+        
 
-        # Initialize the counter for the number of computed pairs.
-        n_computed_pairs = 0
+        
 
-        # def lor(a,b,c):
-        #     return np.logical_or(a,np.logical_or(b,c))
         def lor(a, b, c):
             return a | b | c
 
+        # wait for all ranks to reach here
 
-        for i_station, station in enumerate(self._receivers):
+        toatalnumberofpairs = self._receivers.nstations*self._source.nsources
+        # parst the indcies from zero to totalnumberofpairs into nprocs parts
+        # and assign each part to a rank
+        # the rank 0 will have the first part, rank 1 will have the second part and so on
+        istart = rank * toatalnumberofpairs // nprocs
+        iend = (rank + 1) * toatalnumberofpairs // nprocs
+        if rank == 0:
+            istartList = []
+            iendList = []
+            for i in range(nprocs):
+                istartList.append(i * toatalnumberofpairs // nprocs)
+                iendList.append((i + 1) * toatalnumberofpairs // nprocs)
+            print(f"{istartList=}")
+            print(f"{iendList=}")
+        
+
+        
+
+
+        if rank == 0:
+            # start timing
+            perf_time_begin = perf_counter()
+            npairs = self._receivers.nstations*self._source.nsources
+            print(f"{npairs=}")
+            pairs_to_compute = np.empty((npairs, 2), dtype=np.int32)
+
+            print ("Creating source matrix")
+            source_matrix = np.empty((self._source.nsources, 3), dtype=np.double)
             for i_psource, psource in enumerate(self._source):
+                source_matrix[i_psource, :] = psource.x
+            
+            print ("Creating receiver matrix")
+            receiver_matrix = np.empty((self._receivers.nstations, 3), dtype=np.double)
+            for i_station, station in enumerate(self._receivers):
+                receiver_matrix[i_station, :] = station.x
+
+            z_rec           = receiver_matrix[:,2]
+            z_src           = source_matrix[:,2]
+
+            # compute the distance matrix
+            print ("Computing the distance matrix")
+            dd = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2)
+            dd = dd.flatten()
+
+            # compute the horizontal distance matrix
+            print ("Computing the horizontal distance matrix")
+            receiver_matrix = receiver_matrix[:,:2]
+            source_matrix   = source_matrix[:,:2]
+            dh = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2)
+            dh = dh.flatten()
+
+
+            # compute the vertical distance matrix
+            print ("Computing the vertical distance matrix")
+            z_rec, z_src = np.meshgrid(z_rec, z_src, indexing='ij')
+            z_rec = z_rec.flatten()
+            z_src = z_src.flatten()
+            dv = np.abs(z_rec - z_src)
+
+        comm.Barrier()
+        if nprocs > 1:
+            # create dh z_rec and z_src for all ranks
+            mini_dh    = np.empty(iend-istart, dtype=np.double)
+            mini_z_rec = np.empty(iend-istart, dtype=np.double)
+            mini_z_src = np.empty(iend-istart, dtype=np.double)
+            if rank == 0:
+                # send sub dh, z_rec and z_src to all ranks
+                for i in range(1, nprocs):
+                    istart = istartList[i]
+                    iend = iendList[i]
+                    comm.Send(dh[istart:iend], dest=i, tag=1)
+                    comm.Send(z_rec[istart:iend], dest=i, tag=2)
+                    comm.Send(z_src[istart:iend], dest=i, tag=3)
+                istart = istartList[0]
+                iend = iendList[0]
+            else:
+                comm.Recv(mini_dh, source=0, tag=1)
+                comm.Recv(mini_z_rec, source=0, tag=2)
+                comm.Recv(mini_z_src, source=0, tag=3) 
+        elif nprocs == 1:
+            print("running on one processor")
+            istart = 0
+            iend = toatalnumberofpairs
+
+        comm.Barrier()            
+
+ 
+        if using_vectorize_manner==True and longer_version==False:
+            if rank == 0:
+                # Creating bins
+                print ("Computing the bins")
+                eps = 1e-8
+                dh_max , dh_min = np.max(dh), np.min(dh)
+                dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor)
+
+                dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec)
+                dzrec_bins = np.arange(dzrec_min, dzrec_max + eps, delta_v_rec*factor)
 
+                dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src)
+                dzsrc_bins = np.arange(dzsrc_min, dzsrc_max + eps, delta_v_src*factor)
 
-                if n_computed_pairs >= npairs_max:
-                    print("Exceeded number of pairs!!")
-                    exit(0)
+                # computing total number of bins
+                num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins)
+                print(f"Total number of bins: {num_bins}")
 
-                t1 = perf_counter()
+                # digitizing the data
+                print ("Digitizing the data")
+                dh_digitized = np.digitize(dh, dh_bins)
+                z_rec_digitized = np.digitize(z_rec, dzrec_bins)
+                z_src_digitized = np.digitize(z_src, dzsrc_bins)
 
-                x_src = psource.x
-                x_rec = station.x
                 
-                z_rec = station.x[2]
-                z_src = psource.x[2]
-
-                d = x_rec - x_src
-
-                dd = np.linalg.norm(d)
-                dh = np.linalg.norm(d[0:2])
-                dv = np.abs(d[2])
-
-                # dists[ipair,0] = dh
-                # dists[ipair,1] = dv
-               
-                condition = lor(np.abs(dh - dh_of_pairs[:n_computed_pairs])      > delta_h,     \
-                                np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
-
-                # condition = (np.abs(dd - dd_of_pairs[:n_computed_pairs]) < delta_h) & \
-                            # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) 
-                # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \
-                #             (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v_src) &\
-                #             (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v_rec) 
-                # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \
-                #             (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) & \
-                #             (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v)
-                            # (np.abs(dv - dv_of_pairs[:n_computed_pairs]) < delta_v) & \
-
-                if n_computed_pairs == 0 or np.all(condition):
-                    pairs_to_compute[n_computed_pairs,:] = [i_station, i_psource]
-                    dd_of_pairs[n_computed_pairs] = dd
-                    dh_of_pairs[n_computed_pairs] = dh
-                    dv_of_pairs[n_computed_pairs] = dv
-                    zrec_of_pairs[n_computed_pairs] = z_rec
-                    zsrc_of_pairs[n_computed_pairs] = z_src
+                
+                
+                # creat matrix of pairs
+                print ("Creating the pairs")
+                dh_digitized    -= 1
+                z_rec_digitized -= 1
+                z_src_digitized -= 1
+                # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)]
+                pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized
+                pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True)
+                indices = np.sort(indices)
+                i_station = indices // len(self._source._pslist)
+                i_psource = indices % len(self._source._pslist)
+
+
+
+
+
+
+
+                npairs_max = len(pairs_to_compute)
+                pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32)
+                dd_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                dh_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                dv_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                zrec_of_pairs = np.empty(npairs_max, dtype=np.double)
+                zsrc_of_pairs = np.empty(npairs_max, dtype=np.double)
+
+                print(f"npairs_max = {npairs_max}")
+                # ====================================================================================================
+                # optimizing the number of pairs
+                # ====================================================================================================
+
+
+
+                n_computed_pairs = 0
+                print ("Optimizing the number of pairs")
+                print("\titeration 1")
+                print("\t\treducing the number of pairs:")
+            
+                # reducing the number of pairs
+                
+                for j in tqdm(range(len(indices))):
+                    i = indices[j]
+                    condition = lor(np.abs(dh[i]    - dh_of_pairs[:n_computed_pairs])   > delta_h,     \
+                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
+                    
+                    if n_computed_pairs == 0 or np.all(condition):
+                        pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]]
+                        dd_of_pairs[n_computed_pairs] = dd[i]
+                        dh_of_pairs[n_computed_pairs] = dh[i]
+                        dv_of_pairs[n_computed_pairs] = dv[i]
+                        zrec_of_pairs[n_computed_pairs] = z_rec[i]
+                        zsrc_of_pairs[n_computed_pairs] = z_src[i]
+
+                        n_computed_pairs += 1
+
+                    
+                    # if j % 10000 == 0 or j == len(indices) - 1:
+                    #     print(f"\t\t\tOn {j=} of {npairs_max} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")
+
+            # wait for all ranks to reach here
+            comm.Barrier()
+            if nprocs ==1 :
+                # now check that the founded pairs cover all the points 
+                notcoverd = []
+                for i in range(len(dh)):
+                    if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                        notcoverd.append(i)
+                
+                # print(len(notcoverd))
+                print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")                
+                print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}")
+            else:
+
+
+                if rank == 0:
+                    # On rank 0, calculate or set the value of n_computed_pairs
+                    n_computed_pairs = np.array(n_computed_pairs, dtype='i')  # Example value
+                else:
+                    # On other ranks, create an empty array to receive the value
+                    n_computed_pairs = np.empty(1, dtype='i')
+
+                # Broadcast the value from rank 0 to all other ranks
+                comm.Bcast(n_computed_pairs, root=0)
+
+                # Extract the integer from the NumPy array
+                # check that if the array is zero dimensional
+                n_computed_pairs = n_computed_pairs.item()
+
+
+                
+                comm.Barrier()
+
+                if rank > 0:    
+                    dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                    zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                    zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double)              
+                
+                comm.Barrier()
+                # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks
+                if rank == 0:
+                    for i in range(1, nprocs):
+                        comm.Send(dh_of_pairs[:n_computed_pairs],   dest=i, tag=5)
+                        comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6)
+                        comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7)
+                else:
+                    comm.Recv(dh_of_pairs,   source=0, tag=5)
+                    comm.Recv(zrec_of_pairs, source=0, tag=6)
+                    comm.Recv(zsrc_of_pairs, source=0, tag=7)
+                comm.Barrier()
+                # now found uncovered points for each rank
+                notcoverd = []
+                if rank == 0:
+                    # measure the percentage of the cheking
+                    print(f"\t\tchecking the uncovered points")
+                    for i in tqdm(range(iend)):
+                        if np.all(lor(np.abs(dh[i]  - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                            notcoverd.append(i)
+                            
+                else:
+                    for i in range(len(mini_dh)):
+                        if np.all(lor(np.abs(mini_dh[i]  - dh_of_pairs) > delta_h, \
+                                    np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \
+                                    np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)):
+                            notcoverd.append(i)
+                
+                # if rank == 0:
+                # print(f"{rank=} {notcoverd=}")
+                notcoverd = np.array(notcoverd) + istart
+            
+                comm.Barrier()
+                # print(f"{rank=} {len(notcoverd)=}")
+                # Gather the sizes of the notcoverd arrays from all processes
+                notcoverd_sizes = comm.gather(len(notcoverd), root=0)
+
+                if rank == 0:
+                    # Calculate the total size and create a buffer to hold all data
+                    total_size = sum(notcoverd_sizes)
+                    all_notcoverd = np.empty(total_size, dtype=int)
+                else:
+                    all_notcoverd = None
+
+                # Gather all notcoverd arrays to the root process
+                comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0)
+                notcoverd = all_notcoverd
+                if rank == 0:
+                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
+                    print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
+
+            
+                
+                    
+                     
+
+            # wait for all ranks to reach here
+            comm.Barrier()
+            if rank == 0:
+
+                iteration = 2
+                while len(notcoverd) > 0:
+                    print(f"\titeration {iteration}")
+                    dh_copy    = dh[notcoverd]
+                    z_rec_copy = z_rec[notcoverd]
+                    z_src_copy = z_src[notcoverd]
+                    indexes    = np.array(notcoverd)
+
+                    # digitizing the data
 
+                    dh_digitized    = np.digitize(dh[notcoverd], dh_bins)
+                    z_rec_digitized = np.digitize(z_rec[notcoverd], dzrec_bins)
+                    z_src_digitized = np.digitize(z_src[notcoverd], dzsrc_bins)
+
+                    dh_digitized    -= 1
+                    z_rec_digitized -= 1
+                    z_src_digitized -= 1
+
+                    additional_pairs = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized
+                    additional_pairs, indices = np.unique(additional_pairs, return_index=True)
+                    indices = np.sort(indices)
+                    indices = indexes[indices]
+
+                    
+
+                    print(f"\t\treducing the number of pairs:")
+                    for j in tqdm(range(len(indices))):
+                        i = indices[j]
+                        condition = lor(np.abs(dh[i]    - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                        np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                        np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
+                        if np.all(condition):
+                            istation = i // len(self._source._pslist)
+                            ipsource = i % len(self._source._pslist)
+                            pairs_to_compute[n_computed_pairs,:] = [istation, ipsource]
+                            dd_of_pairs[n_computed_pairs] = dd[i]
+                            dh_of_pairs[n_computed_pairs] = dh[i]
+                            dv_of_pairs[n_computed_pairs] = dv[i]
+                            zrec_of_pairs[n_computed_pairs] = z_rec[i]
+                            zsrc_of_pairs[n_computed_pairs] = z_src[i]
+                            n_computed_pairs += 1
+                        # if (j+1) % 1000 == 0:
+                        #     print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")   
+                    
+                    notcoverd = []
+                    print(f"\t\tchecking the uncovered points")
+                    for j in tqdm(range(len(indexes))):
+                        i = indexes[j]
+                        if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                            notcoverd.append(i)
+                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                    iteration += 1
+                            
+                        
+                        
+                print("Optimization done")
+                print(f"Number of pairs founded: {n_computed_pairs}")
+                print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}")
+                print(f"Number of points not covered yet: {len(notcoverd)}")
+                # print timing 
+                perf_time_end = perf_counter()
+                time = perf_time_end - perf_time_begin
+                # conver time to hours, minutes and seconds
+                print(f" Time: hours: {time//3600} minutes: {(time%3600)//60} seconds: {time%60}")
+                
+           
+            # if rank == 0:
+            #     print("final check")
+            #     # checking the not covered points
+            #     notcoverd = []
+            #     for i in range(len(dh)):
+            #         if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+            #                     np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+            #                     np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+            #             notcoverd.append(i)
+            #             # print(f"Error: {i} not found in the pairs notcoverd={notcoverd}")     
+
+            #     print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+            #     print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+        
+        comm.Barrier()
+
+        if using_vectorize_manner==True and longer_version==True:
+            print("Using vectorize manner and longer version")
+            if rank == 0:
+                # make the factor 0.99 to avoid the error in the digitize function
+                factor = 0.98
+                # Creating bins
+                print ("Computing the bins")
+                eps = 1e-8
+                dh_max , dh_min = np.max(dh), np.min(dh)
+                # dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor)
+                num = (dh_max - dh_min) / (delta_h*factor)
+                # celing the number of bins
+                num = np.ceil(num)
+                num = int(num)
+                # bins should include the maximum value
+                dh_bins = np.linspace(dh_min-eps, dh_max+eps, num+1)
+
+
+                dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec)
+                num = (dzrec_max - dzrec_min) / (delta_v_rec*factor)
+                # celing the number of bins
+                num = np.ceil(num)
+                num = int(num)
+                # bins should include the maximum value
+                dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max+eps, num+1)
+
+
+                dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src)
+                num = (dzsrc_max - dzsrc_min) / (delta_v_src*factor)
+                # celing the number of bins
+                num = np.ceil(num)
+                num = int(num)
+                # bins should include the maximum value
+                dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max+eps, num+1)
+
+
+                # verify that the bins differ by delta_h, delta_v_rec and delta_v_src
+                assert np.all(np.diff(dh_bins) - delta_h < 1e-8)
+                assert np.all(np.diff(dzrec_bins) - delta_v_rec < 1e-8)
+                assert np.all(np.diff(dzsrc_bins) - delta_v_src < 1e-8)
+
+
+                # computing total number of bins
+                num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins)
+                print(f"Total number of bins: {num_bins}")
+
+                # digitizing the data
+                print ("Digitizing the data")
+                dh_digitized = np.digitize(dh, dh_bins)
+                z_rec_digitized = np.digitize(z_rec, dzrec_bins)
+                z_src_digitized = np.digitize(z_src, dzsrc_bins)
+
+                
+                
+                
+                # creat matrix of pairs
+                print ("Creating the pairs")
+                dh_digitized    -= 1
+                z_rec_digitized -= 1
+                z_src_digitized -= 1
+                # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)]
+                pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized
+                pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True)
+                indices = np.sort(indices)
+                i_station = indices // len(self._source._pslist)
+                i_psource = indices % len(self._source._pslist)
+
+
+
+
+
+
+
+                npairs_max = len(pairs_to_compute)
+                pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32)
+                dd_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                dh_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                dv_of_pairs   = np.empty(npairs_max, dtype=np.double)
+                zrec_of_pairs = np.empty(npairs_max, dtype=np.double)
+                zsrc_of_pairs = np.empty(npairs_max, dtype=np.double)
+
+                print(f"npairs_max = {npairs_max}")
+                # ====================================================================================================
+                # optimizing the number of pairs
+                # ====================================================================================================
+
+
+
+                n_computed_pairs = 0
+                print ("Optimizing the number of pairs")
+                print("\titeration 1")
+                print("\t\treducing the number of pairs:")
+            
+                # reducing the number of pairs
+                
+                for j in tqdm(range(len(indices))):
+                    i = indices[j]
+                    pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]]
+                    dd_of_pairs[n_computed_pairs] = dd[i]
+                    dh_of_pairs[n_computed_pairs] = dh[i]
+                    dv_of_pairs[n_computed_pairs] = dv[i]
+                    zrec_of_pairs[n_computed_pairs] = z_rec[i]
+                    zsrc_of_pairs[n_computed_pairs] = z_src[i]
                     n_computed_pairs += 1
-                t2 = perf_counter()
 
-                if ipair % 10000 == 0:
-                    ETA = (t2-t1)*(npairs-ipair)/3600.
-                    print(f"On {ipair=} of {npairs} {n_computed_pairs=} ({n_computed_pairs/npairs*100}% reduction) {ETA=}h")
+        if using_vectorize_manner==False:
+            srart = perf_counter()
 
-                ipair += 1
-     
-
-        pairs_to_compute = pairs_to_compute[:n_computed_pairs,:]
-        dd_of_pairs = dd_of_pairs[:n_computed_pairs]
-        dh_of_pairs = dh_of_pairs[:n_computed_pairs]
-        dv_of_pairs = dv_of_pairs[:n_computed_pairs]
-        zrec_of_pairs = zrec_of_pairs[:n_computed_pairs]
-        zsrc_of_pairs = zsrc_of_pairs[:n_computed_pairs]
-
-        print(f"Need only {n_computed_pairs} pairs of {npairs} ({n_computed_pairs/npairs*100}% reduction)")
-
-        if store_here is not None:
-            import h5py
-            with h5py.File(store_here + '.h5', 'w') as hf:
-                # hf.create_dataset("dists", data=dists)
-                hf.create_dataset("pairs_to_compute", data=pairs_to_compute)
-                hf.create_dataset("dd_of_pairs", data=dd_of_pairs)
-                hf.create_dataset("dh_of_pairs", data=dh_of_pairs)
-                hf.create_dataset("dv_of_pairs", data=dv_of_pairs)
-                hf.create_dataset("zrec_of_pairs", data=zrec_of_pairs)
-                hf.create_dataset("zsrc_of_pairs", data=zsrc_of_pairs)
-                hf.create_dataset("delta_h", data=delta_h)
-                hf.create_dataset("delta_v_rec", data=delta_v_rec)
-                hf.create_dataset("delta_v_src", data=delta_v_src)
+            ipair = 0
+
+            npairs = self._receivers.nstations*len(self._source._pslist)
+
+            print(f"{npairs=}")
+
+            # dists = np.zeros((npairs, 2))
+
+            pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32)
+            dd_of_pairs = np.empty(npairs_max, dtype=np.double)
+            dh_of_pairs = np.empty(npairs_max, dtype=np.double)
+            dv_of_pairs = np.empty(npairs_max, dtype=np.double)
+            zrec_of_pairs = np.empty(npairs_max, dtype=np.double)
+            zsrc_of_pairs = np.empty(npairs_max, dtype=np.double)
+
+            # Initialize the counter for the number of computed pairs.
+            n_computed_pairs = 0
+
+            # def lor(a,b,c):
+            #     return np.logical_or(a,np.logical_or(b,c))
+
+
+
+            for i_station, station in enumerate(self._receivers):
+                for i_psource, psource in enumerate(self._source):
+
+
+                    if n_computed_pairs >= npairs_max:
+                        print("Exceeded number of pairs!!")
+                        exit(0)
+
+                    t1 = perf_counter()
+
+                    x_src = psource.x
+                    x_rec = station.x
+                    
+                    z_rec = station.x[2]
+                    z_src = psource.x[2]
+
+                    d = x_rec - x_src
+
+                    dd = np.linalg.norm(d)
+                    dh = np.linalg.norm(d[0:2])
+                    dv = np.abs(d[2])
+
+                    # dists[ipair,0] = dh
+                    # dists[ipair,1] = dv
+                
+                    condition = lor(np.abs(dh - dh_of_pairs[:n_computed_pairs])      > delta_h,     \
+                                    np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
+
+                    # condition = (np.abs(dd - dd_of_pairs[:n_computed_pairs]) < delta_h) & \
+                                # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) 
+                    # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \
+                    #             (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v_src) &\
+                    #             (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v_rec) 
+                    # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \
+                    #             (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) & \
+                    #             (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v)
+                                # (np.abs(dv - dv_of_pairs[:n_computed_pairs]) < delta_v) & \
+
+                    if n_computed_pairs == 0 or np.all(condition):
+                        pairs_to_compute[n_computed_pairs,:] = [i_station, i_psource]
+                        dd_of_pairs[n_computed_pairs] = dd
+                        dh_of_pairs[n_computed_pairs] = dh
+                        dv_of_pairs[n_computed_pairs] = dv
+                        zrec_of_pairs[n_computed_pairs] = z_rec
+                        zsrc_of_pairs[n_computed_pairs] = z_src
+
+                        n_computed_pairs += 1
+                        # print on normal file with 4 percision
+                    t2 = perf_counter()
+
+                    if ipair % 10000 == 0:
+                        ETA = (t2-t1)*(npairs-ipair)/3600.
+                        print(f"On {ipair=} of {npairs} {n_computed_pairs=} ({n_computed_pairs/npairs*100}% reduction) {ETA=}h")
+                    
+                    ipair += 1
+            end = perf_counter()
+
+            print(f"Time: hours: {(end-srart)//3600} minutes: {((end-srart)%3600)//60} seconds: {(end-srart)%60}")
+
+
+        if rank == 0:
+            pairs_to_compute = pairs_to_compute[:n_computed_pairs,:]
+            dd_of_pairs = dd_of_pairs[:n_computed_pairs]
+            dh_of_pairs = dh_of_pairs[:n_computed_pairs]
+            dv_of_pairs = dv_of_pairs[:n_computed_pairs]
+            zrec_of_pairs = zrec_of_pairs[:n_computed_pairs]
+            zsrc_of_pairs = zsrc_of_pairs[:n_computed_pairs]
+
+            print(f"Need only {n_computed_pairs} pairs of {npairs} ({n_computed_pairs/npairs*100}% reduction)")
+
+            if store_here is not None:
+                import h5py
+                with h5py.File(store_here + '.h5', 'w') as hf:
+                    # hf.create_dataset("dists", data=dists)
+                    hf.create_dataset("pairs_to_compute", data=pairs_to_compute)
+                    hf.create_dataset("dd_of_pairs", data=dd_of_pairs)
+                    hf.create_dataset("dh_of_pairs", data=dh_of_pairs)
+                    hf.create_dataset("dv_of_pairs", data=dv_of_pairs)
+                    hf.create_dataset("zrec_of_pairs", data=zrec_of_pairs)
+                    hf.create_dataset("zsrc_of_pairs", data=zsrc_of_pairs)
+                    hf.create_dataset("delta_h", data=delta_h)
+                    hf.create_dataset("delta_v_rec", data=delta_v_rec)
+                    hf.create_dataset("delta_v_src", data=delta_v_src)
 
 
         # return dists, pairs_to_compute, dh_of_pairs, dv_of_pairs, zrec_of_pairs, zrec_of_pairs

From ec886d3077be667db8450a3e73cb0c664baf9523 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Wed, 11 Sep 2024 03:22:49 -0700
Subject: [PATCH 02/17] modifying the parallel compute green function pairs

---
 shakermaker/shakermaker.py | 151 ++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 69 deletions(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index 9574594..c12b813 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1809,6 +1809,7 @@ def lor(a, b, c):
 
             # wait for all ranks to reach here
             comm.Barrier()
+
             if nprocs ==1 :
                 # now check that the founded pairs cover all the points 
                 notcoverd = []
@@ -1823,8 +1824,6 @@ def lor(a, b, c):
                 print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")                
                 print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}")
             else:
-
-
                 if rank == 0:
                     # On rank 0, calculate or set the value of n_computed_pairs
                     n_computed_pairs = np.array(n_computed_pairs, dtype='i')  # Example value
@@ -1842,65 +1841,69 @@ def lor(a, b, c):
 
                 
                 comm.Barrier()
-
-                if rank > 0:    
-                    dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
-                    zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
-                    zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double)              
-                
-                comm.Barrier()
-                # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks
-                if rank == 0:
-                    for i in range(1, nprocs):
-                        comm.Send(dh_of_pairs[:n_computed_pairs],   dest=i, tag=5)
-                        comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6)
-                        comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7)
-                else:
-                    comm.Recv(dh_of_pairs,   source=0, tag=5)
-                    comm.Recv(zrec_of_pairs, source=0, tag=6)
-                    comm.Recv(zsrc_of_pairs, source=0, tag=7)
-                comm.Barrier()
-                # now found uncovered points for each rank
-                notcoverd = []
-                if rank == 0:
-                    # measure the percentage of the cheking
-                    print(f"\t\tchecking the uncovered points")
-                    for i in tqdm(range(iend)):
-                        if np.all(lor(np.abs(dh[i]  - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
-                            notcoverd.append(i)
-                            
-                else:
-                    for i in range(len(mini_dh)):
-                        if np.all(lor(np.abs(mini_dh[i]  - dh_of_pairs) > delta_h, \
-                                    np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \
-                                    np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)):
-                            notcoverd.append(i)
+                def get_not_covered_points(rank, nprocs):
+                    if rank > 0:    
+                        dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                        zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                        zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double)              
+                    
+                    comm.Barrier()
+                    # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks
+                    if rank == 0:
+                        for i in range(1, nprocs):
+                            comm.Send(dh_of_pairs[:n_computed_pairs],   dest=i, tag=5)
+                            comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6)
+                            comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7)
+                    else:
+                        comm.Recv(dh_of_pairs,   source=0, tag=5)
+                        comm.Recv(zrec_of_pairs, source=0, tag=6)
+                        comm.Recv(zsrc_of_pairs, source=0, tag=7)
+                    comm.Barrier()
+                    # now found uncovered points for each rank
+                    notcoverd = []
+                    if rank == 0:
+                        # measure the percentage of the cheking
+                        print(f"\t\tchecking the uncovered points")
+                        for i in tqdm(range(iend)):
+                            if np.all(lor(np.abs(dh[i]  - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                        np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                        np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                                notcoverd.append(i)
+                                
+                    else:
+                        for i in range(len(mini_dh)):
+                            if np.all(lor(np.abs(mini_dh[i]  - dh_of_pairs) > delta_h, \
+                                        np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \
+                                        np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)):
+                                notcoverd.append(i)
+                    
+                    # if rank == 0:
+                    # print(f"{rank=} {notcoverd=}")
+                    notcoverd = np.array(notcoverd) + istart
                 
-                # if rank == 0:
-                # print(f"{rank=} {notcoverd=}")
-                notcoverd = np.array(notcoverd) + istart
-            
-                comm.Barrier()
-                # print(f"{rank=} {len(notcoverd)=}")
-                # Gather the sizes of the notcoverd arrays from all processes
-                notcoverd_sizes = comm.gather(len(notcoverd), root=0)
+                    comm.Barrier()
+                    # print(f"{rank=} {len(notcoverd)=}")
+                    # Gather the sizes of the notcoverd arrays from all processes
+                    notcoverd_sizes = comm.gather(len(notcoverd), root=0)
 
-                if rank == 0:
-                    # Calculate the total size and create a buffer to hold all data
-                    total_size = sum(notcoverd_sizes)
-                    all_notcoverd = np.empty(total_size, dtype=int)
-                else:
-                    all_notcoverd = None
+                    if rank == 0:
+                        # Calculate the total size and create a buffer to hold all data
+                        total_size = sum(notcoverd_sizes)
+                        all_notcoverd = np.empty(total_size, dtype=int)
+                    else:
+                        all_notcoverd = None
+
+                    # Gather all notcoverd arrays to the root process
+                    comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0)
+                    notcoverd = all_notcoverd
+                    if rank == 0:
+                        print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                        print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
+                        print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
+                    return notcoverd
+                
 
-                # Gather all notcoverd arrays to the root process
-                comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0)
-                notcoverd = all_notcoverd
-                if rank == 0:
-                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                    print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
-                    print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
+                notcoverd = get_not_covered_points(rank, nprocs)
 
             
                 
@@ -1956,19 +1959,29 @@ def lor(a, b, c):
                         #     print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")   
                     
                     notcoverd = []
-                    print(f"\t\tchecking the uncovered points")
-                    for j in tqdm(range(len(indexes))):
-                        i = indexes[j]
-                        if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
-                            notcoverd.append(i)
-                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                    print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                    
+                    # print(f"\t\tchecking the uncovered points")
+                    # for j in tqdm(range(len(indexes))):
+                    #     i = indexes[j]
+                    #     if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                    #                 np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                    #                 np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                    #         notcoverd.append(i)
+                    # print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                    notcoverd = get_not_covered_points(rank, nprocs)
                     iteration += 1
                             
                         
-                        
+                # final check for the not covered points
+                if nprocs > 1:
+                    notcoverd = get_not_covered_points(rank, nprocs)
+                    if rank == 0:
+                        print("final check")
+                        print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                        print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+
+
                 print("Optimization done")
                 print(f"Number of pairs founded: {n_computed_pairs}")
                 print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}")
@@ -1996,7 +2009,7 @@ def lor(a, b, c):
         
         comm.Barrier()
 
-        if using_vectorize_manner==True and longer_version==True:
+        if (using_vectorize_manner==True) and (longer_version==True):
             print("Using vectorize manner and longer version")
             if rank == 0:
                 # make the factor 0.99 to avoid the error in the digitize function

From 5c6067298db59973992cf456dd833357520f6965 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Wed, 11 Sep 2024 03:49:42 -0700
Subject: [PATCH 03/17] modifying the parallel compute green function pairs 2

---
 shakermaker/shakermaker.py | 146 +++++++++++++++++--------------------
 1 file changed, 68 insertions(+), 78 deletions(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index c12b813..20f7d34 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1841,69 +1841,65 @@ def lor(a, b, c):
 
                 
                 comm.Barrier()
-                def get_not_covered_points(rank, nprocs):
-                    if rank > 0:    
-                        dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
-                        zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
-                        zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double)              
-                    
-                    comm.Barrier()
-                    # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks
-                    if rank == 0:
-                        for i in range(1, nprocs):
-                            comm.Send(dh_of_pairs[:n_computed_pairs],   dest=i, tag=5)
-                            comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6)
-                            comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7)
-                    else:
-                        comm.Recv(dh_of_pairs,   source=0, tag=5)
-                        comm.Recv(zrec_of_pairs, source=0, tag=6)
-                        comm.Recv(zsrc_of_pairs, source=0, tag=7)
-                    comm.Barrier()
-                    # now found uncovered points for each rank
-                    notcoverd = []
-                    if rank == 0:
-                        # measure the percentage of the cheking
-                        print(f"\t\tchecking the uncovered points")
-                        for i in tqdm(range(iend)):
-                            if np.all(lor(np.abs(dh[i]  - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                                        np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                        np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
-                                notcoverd.append(i)
-                                
-                    else:
-                        for i in range(len(mini_dh)):
-                            if np.all(lor(np.abs(mini_dh[i]  - dh_of_pairs) > delta_h, \
-                                        np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \
-                                        np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)):
-                                notcoverd.append(i)
-                    
-                    # if rank == 0:
-                    # print(f"{rank=} {notcoverd=}")
-                    notcoverd = np.array(notcoverd) + istart
-                
-                    comm.Barrier()
-                    # print(f"{rank=} {len(notcoverd)=}")
-                    # Gather the sizes of the notcoverd arrays from all processes
-                    notcoverd_sizes = comm.gather(len(notcoverd), root=0)
 
-                    if rank == 0:
-                        # Calculate the total size and create a buffer to hold all data
-                        total_size = sum(notcoverd_sizes)
-                        all_notcoverd = np.empty(total_size, dtype=int)
-                    else:
-                        all_notcoverd = None
-
-                    # Gather all notcoverd arrays to the root process
-                    comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0)
-                    notcoverd = all_notcoverd
-                    if rank == 0:
-                        print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                        print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
-                        print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
-                    return notcoverd
+                if rank > 0:    
+                    dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                    zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double)
+                    zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double)              
                 
+                comm.Barrier()
+                # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks
+                if rank == 0:
+                    for i in range(1, nprocs):
+                        comm.Send(dh_of_pairs[:n_computed_pairs],   dest=i, tag=5)
+                        comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6)
+                        comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7)
+                else:
+                    comm.Recv(dh_of_pairs,   source=0, tag=5)
+                    comm.Recv(zrec_of_pairs, source=0, tag=6)
+                    comm.Recv(zsrc_of_pairs, source=0, tag=7)
+                comm.Barrier()
+                # now found uncovered points for each rank
+                notcoverd = []
+                if rank == 0:
+                    # measure the percentage of the cheking
+                    print(f"\t\tchecking the uncovered points")
+                    for i in tqdm(range(iend)):
+                        if np.all(lor(np.abs(dh[i]  - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                            notcoverd.append(i)
+                            
+                else:
+                    for i in range(len(mini_dh)):
+                        if np.all(lor(np.abs(mini_dh[i]  - dh_of_pairs) > delta_h, \
+                                    np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \
+                                    np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)):
+                            notcoverd.append(i)
+                
+                # if rank == 0:
+                # print(f"{rank=} {notcoverd=}")
+                notcoverd = np.array(notcoverd) + istart
+            
+                comm.Barrier()
+                # print(f"{rank=} {len(notcoverd)=}")
+                # Gather the sizes of the notcoverd arrays from all processes
+                notcoverd_sizes = comm.gather(len(notcoverd), root=0)
+
+                if rank == 0:
+                    # Calculate the total size and create a buffer to hold all data
+                    total_size = sum(notcoverd_sizes)
+                    all_notcoverd = np.empty(total_size, dtype=int)
+                else:
+                    all_notcoverd = None
 
-                notcoverd = get_not_covered_points(rank, nprocs)
+                # Gather all notcoverd arrays to the root process
+                comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0)
+                notcoverd = all_notcoverd
+                if rank == 0:
+                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
+                    print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
 
             
                 
@@ -1959,27 +1955,21 @@ def get_not_covered_points(rank, nprocs):
                         #     print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")   
                     
                     notcoverd = []
-                    
-                    # print(f"\t\tchecking the uncovered points")
-                    # for j in tqdm(range(len(indexes))):
-                    #     i = indexes[j]
-                    #     if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                    #                 np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                    #                 np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
-                    #         notcoverd.append(i)
-                    # print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                    # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
-                    notcoverd = get_not_covered_points(rank, nprocs)
+                    print(f"\t\tchecking the uncovered points")
+                    for j in tqdm(range(len(indexes))):
+                        i = indexes[j]
+                        if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                            notcoverd.append(i)
+                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
                     iteration += 1
                             
                         
-                # final check for the not covered points
-                if nprocs > 1:
-                    notcoverd = get_not_covered_points(rank, nprocs)
-                    if rank == 0:
-                        print("final check")
-                        print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                        print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                # final checking 
+                # for j in tqdm(range(len(dh))):
+
 
 
                 print("Optimization done")

From 44c10af9ef6355ef3ce73d4f5060167609f3fafd Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Wed, 11 Sep 2024 19:41:57 -0700
Subject: [PATCH 04/17] modifying the parallel compute green function pairs 3

---
 shakermaker/shakermaker.py | 271 ++++++++++++++++++-------------------
 1 file changed, 133 insertions(+), 138 deletions(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index 20f7d34..ededa9d 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -913,6 +913,7 @@ def printMPI(*args):
                             ipair_target = best_match_index
                         else:
                             print(f"No suitable match found! {allow_out_of_bounds=} {min_distance=}")
+                            print(f"{rank=} {i_station=} {i_psource=}")
 
                         if ipair_target == len(dh_of_pairs):
                             print("Target not found in database -- SKIPPING")
@@ -1564,7 +1565,7 @@ def gen_greens_function_database_pairs(self,
         npairs_max=100000,
         using_vectorize_manner=False,
         cfactor = 0.5,
-        longer_version=True
+        finalcheck=True,
         ):
         """Run the simulation. 
         
@@ -1684,7 +1685,6 @@ def lor(a, b, c):
             dh = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2)
             dh = dh.flatten()
 
-
             # compute the vertical distance matrix
             print ("Computing the vertical distance matrix")
             z_rec, z_src = np.meshgrid(z_rec, z_src, indexing='ij')
@@ -1711,28 +1711,31 @@ def lor(a, b, c):
             else:
                 comm.Recv(mini_dh, source=0, tag=1)
                 comm.Recv(mini_z_rec, source=0, tag=2)
-                comm.Recv(mini_z_src, source=0, tag=3) 
-        elif nprocs == 1:
-            print("running on one processor")
-            istart = 0
-            iend = toatalnumberofpairs
+                comm.Recv(mini_z_src, source=0, tag=3)    
 
         comm.Barrier()            
 
  
-        if using_vectorize_manner==True and longer_version==False:
+        if using_vectorize_manner==True:
             if rank == 0:
                 # Creating bins
                 print ("Computing the bins")
                 eps = 1e-8
                 dh_max , dh_min = np.max(dh), np.min(dh)
-                dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor)
+                binsnumber = int((dh_max - dh_min) // (delta_h*factor) + 1)
+                dh_bins = np.linspace(dh_min, dh_max + eps, binsnumber+1)
+                assert np.all(np.diff(dh_bins) <= delta_h*factor)
+                
 
                 dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec)
-                dzrec_bins = np.arange(dzrec_min, dzrec_max + eps, delta_v_rec*factor)
+                binsnumber = int((dzrec_max - dzrec_min) // (delta_v_rec*factor) + 1)
+                dzrec_bins = np.linspace(dzrec_min, dzrec_max + eps, binsnumber+1)
+                assert np.all(np.diff(dzrec_bins) <= delta_v_rec*factor)
 
                 dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src)
-                dzsrc_bins = np.arange(dzsrc_min, dzsrc_max + eps, delta_v_src*factor)
+                binsnumber = int((dzsrc_max - dzsrc_min) // (delta_v_src*factor) + 1)
+                dzsrc_bins = np.linspace(dzsrc_min, dzsrc_max + eps, binsnumber+1)
+                assert np.all(np.diff(dzsrc_bins) <= delta_v_src*factor)
 
                 # computing total number of bins
                 num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins)
@@ -1803,17 +1806,17 @@ def lor(a, b, c):
 
                         n_computed_pairs += 1
 
-                    
+                print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
                     # if j % 10000 == 0 or j == len(indices) - 1:
                     #     print(f"\t\t\tOn {j=} of {npairs_max} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")
 
             # wait for all ranks to reach here
             comm.Barrier()
-
             if nprocs ==1 :
+                print("using single processor:")
                 # now check that the founded pairs cover all the points 
                 notcoverd = []
-                for i in range(len(dh)):
+                for i in tqdm(range(len(dh))):
                     if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
                                 np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
                                 np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
@@ -1823,7 +1826,11 @@ def lor(a, b, c):
                 print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
                 print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")                
                 print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}")
+                # print notcoverd in a file
+                np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d')
             else:
+
+
                 if rank == 0:
                     # On rank 0, calculate or set the value of n_computed_pairs
                     n_computed_pairs = np.array(n_computed_pairs, dtype='i')  # Example value
@@ -1900,6 +1907,8 @@ def lor(a, b, c):
                     print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
                     print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
                     print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
+                    # print notcoverd in a file
+                    np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d')
 
             
                 
@@ -1920,6 +1929,24 @@ def lor(a, b, c):
 
                     # digitizing the data
 
+                    eps = 1e-8
+                    factor = 0.99
+                    dh_max , dh_min = np.max(dh), np.min(dh)
+                    binsnumber = int((dh_max - dh_min) // (delta_h*factor) + 1)
+                    dh_bins = np.linspace(dh_min-eps, dh_max + eps, binsnumber+1)
+                    assert np.all(np.diff(dh_bins) <= delta_h*factor)
+                    
+
+                    dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec)
+                    binsnumber = int((dzrec_max - dzrec_min) // (delta_v_rec*factor) + 1)
+                    dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max + eps, binsnumber+1)
+                    assert np.all(np.diff(dzrec_bins) <= delta_v_rec*factor)
+
+                    dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src)
+                    binsnumber = int((dzsrc_max - dzsrc_min) // (delta_v_src*factor) + 1)
+                    dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max + eps, binsnumber+1)
+                    assert np.all(np.diff(dzsrc_bins) <= delta_v_src*factor)
+
                     dh_digitized    = np.digitize(dh[notcoverd], dh_bins)
                     z_rec_digitized = np.digitize(z_rec[notcoverd], dzrec_bins)
                     z_src_digitized = np.digitize(z_src[notcoverd], dzsrc_bins)
@@ -1933,15 +1960,17 @@ def lor(a, b, c):
                     indices = np.sort(indices)
                     indices = indexes[indices]
 
+
                     
 
-                    print(f"\t\treducing the number of pairs:")
+                    # print(f"\t\treducing the number of pairs:")
                     for j in tqdm(range(len(indices))):
                         i = indices[j]
-                        condition = lor(np.abs(dh[i]    - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                                        np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                        np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
-                        if np.all(condition):
+                        # condition = lor(np.abs(dh[i]    - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                        #                 np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                        #                 np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)
+                        # if np.all(condition):
+                        if True:
                             istation = i // len(self._source._pslist)
                             ipsource = i % len(self._source._pslist)
                             pairs_to_compute[n_computed_pairs,:] = [istation, ipsource]
@@ -1955,23 +1984,33 @@ def lor(a, b, c):
                         #     print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)")   
                     
                     notcoverd = []
-                    print(f"\t\tchecking the uncovered points")
-                    for j in tqdm(range(len(indexes))):
-                        i = indexes[j]
-                        if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
-                                    np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
-                                    np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
-                            notcoverd.append(i)
-                    print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
-                    print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                    # print(f"\t\tchecking the uncovered points")
+                    # for j in tqdm(range(len(indexes))):
+                    #     i = indexes[j]
+                    #     if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                    #                 np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                    #                 np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                    #         notcoverd.append(i)
+                    # print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                    # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
                     iteration += 1
                             
+                # if finalcheck:
+                #     if rank == 0:
+                #         print("final check")
+                #         # checking the not covered points
+                #         notcoverd = []
+                #         for i in range(len(dh)):
+                #             if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \
+                #                         np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \
+                #                         np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)):
+                #                 notcoverd.append(i)
+                #             if i % (len(dh)//10) == 0:
+                #                 print(f"{((i+1)//len(dh)*100)}%")
+                #         print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
+                #         print(f"\t\tnumber of points not covered yet: {len(notcoverd)}")
+                # print timin
                         
-                # final checking 
-                # for j in tqdm(range(len(dh))):
-
-
-
                 print("Optimization done")
                 print(f"Number of pairs founded: {n_computed_pairs}")
                 print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}")
@@ -1999,111 +2038,6 @@ def lor(a, b, c):
         
         comm.Barrier()
 
-        if (using_vectorize_manner==True) and (longer_version==True):
-            print("Using vectorize manner and longer version")
-            if rank == 0:
-                # make the factor 0.99 to avoid the error in the digitize function
-                factor = 0.98
-                # Creating bins
-                print ("Computing the bins")
-                eps = 1e-8
-                dh_max , dh_min = np.max(dh), np.min(dh)
-                # dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor)
-                num = (dh_max - dh_min) / (delta_h*factor)
-                # celing the number of bins
-                num = np.ceil(num)
-                num = int(num)
-                # bins should include the maximum value
-                dh_bins = np.linspace(dh_min-eps, dh_max+eps, num+1)
-
-
-                dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec)
-                num = (dzrec_max - dzrec_min) / (delta_v_rec*factor)
-                # celing the number of bins
-                num = np.ceil(num)
-                num = int(num)
-                # bins should include the maximum value
-                dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max+eps, num+1)
-
-
-                dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src)
-                num = (dzsrc_max - dzsrc_min) / (delta_v_src*factor)
-                # celing the number of bins
-                num = np.ceil(num)
-                num = int(num)
-                # bins should include the maximum value
-                dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max+eps, num+1)
-
-
-                # verify that the bins differ by delta_h, delta_v_rec and delta_v_src
-                assert np.all(np.diff(dh_bins) - delta_h < 1e-8)
-                assert np.all(np.diff(dzrec_bins) - delta_v_rec < 1e-8)
-                assert np.all(np.diff(dzsrc_bins) - delta_v_src < 1e-8)
-
-
-                # computing total number of bins
-                num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins)
-                print(f"Total number of bins: {num_bins}")
-
-                # digitizing the data
-                print ("Digitizing the data")
-                dh_digitized = np.digitize(dh, dh_bins)
-                z_rec_digitized = np.digitize(z_rec, dzrec_bins)
-                z_src_digitized = np.digitize(z_src, dzsrc_bins)
-
-                
-                
-                
-                # creat matrix of pairs
-                print ("Creating the pairs")
-                dh_digitized    -= 1
-                z_rec_digitized -= 1
-                z_src_digitized -= 1
-                # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)]
-                pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized
-                pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True)
-                indices = np.sort(indices)
-                i_station = indices // len(self._source._pslist)
-                i_psource = indices % len(self._source._pslist)
-
-
-
-
-
-
-
-                npairs_max = len(pairs_to_compute)
-                pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32)
-                dd_of_pairs   = np.empty(npairs_max, dtype=np.double)
-                dh_of_pairs   = np.empty(npairs_max, dtype=np.double)
-                dv_of_pairs   = np.empty(npairs_max, dtype=np.double)
-                zrec_of_pairs = np.empty(npairs_max, dtype=np.double)
-                zsrc_of_pairs = np.empty(npairs_max, dtype=np.double)
-
-                print(f"npairs_max = {npairs_max}")
-                # ====================================================================================================
-                # optimizing the number of pairs
-                # ====================================================================================================
-
-
-
-                n_computed_pairs = 0
-                print ("Optimizing the number of pairs")
-                print("\titeration 1")
-                print("\t\treducing the number of pairs:")
-            
-                # reducing the number of pairs
-                
-                for j in tqdm(range(len(indices))):
-                    i = indices[j]
-                    pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]]
-                    dd_of_pairs[n_computed_pairs] = dd[i]
-                    dh_of_pairs[n_computed_pairs] = dh[i]
-                    dv_of_pairs[n_computed_pairs] = dv[i]
-                    zrec_of_pairs[n_computed_pairs] = z_rec[i]
-                    zsrc_of_pairs[n_computed_pairs] = z_src[i]
-                    n_computed_pairs += 1
-
         if using_vectorize_manner==False:
             srart = perf_counter()
 
@@ -2219,6 +2153,67 @@ def lor(a, b, c):
         # return dists, pairs_to_compute, dh_of_pairs, dv_of_pairs, zrec_of_pairs, zrec_of_pairs
         return 
 
+
+
+    def check_greens_function_database_pairs(self,  h5_database_name, delta_h=0.04, delta_v_rec=0.002, delta_v_src=0.2):
+        import h5py
+        if rank == 0:
+            hfile = h5py.File(h5_database_name + '.h5', 'r+')
+
+            pairs_to_compute = hfile["/pairs_to_compute"][:]
+            dh_of_pairs      = hfile["/dh_of_pairs"][:]
+            zrec_of_pairs    = hfile["/zrec_of_pairs"][:]
+            zsrc_of_pairs    = hfile["/zsrc_of_pairs"][:]
+            f = open("check.txt", "w")
+            bar = tqdm(total=self._receivers.nstations)
+            bar2 = tqdm(total=self._source.nsources)
+            for i_station, station in enumerate(self._receivers):
+                bar.update(1)
+                bar2.reset()
+                for i_psource, psource in enumerate(self._source):
+                    bar2.update(1)
+
+                    x_src = psource.x
+                    x_rec = station.x
+                
+                    z_src = psource.x[2]
+                    z_rec = station.x[2]
+
+                    d = x_rec - x_src
+                    dh = np.sqrt(np.dot(d[0:2],d[0:2]))
+
+                    min_distance = float('inf')
+                    best_match_index = -1
+
+                    for i in range(len(dh_of_pairs)):
+                        dh_p, zrec_p, zsrc_p = dh_of_pairs[i], zrec_of_pairs[i], zsrc_of_pairs[i]
+                        
+                        # Check if the current pair is within the tolerances
+                        if (abs(dh - dh_p) < delta_h and abs(z_src - zsrc_p) < delta_v_src and abs(z_rec - zrec_p) < delta_v_rec):
+                            best_match_index = i
+                            
+                    if best_match_index == -1:
+                        # print(f"No suitable match found! {min_distance=}")
+                        # print(f"{rank=} {i_station=} {i_psource=}")
+                        # write on the file
+                        f.write(f"{rank=} {i_station=} {i_psource=}\n")
+                    if i_psource % 1000 == 0:
+                        # flush the file
+                        f.flush()
+            f.close()
+            bar.close()
+            bar2.close()
+
+                    
+
+            
+
+            
+
+
+
+
+
     def write(self, writer):
         writer.write(self._receivers)
 

From 187df6af463109bc1cb99034b547799af91597d9 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Sun, 15 Sep 2024 13:28:04 -0700
Subject: [PATCH 05/17] modifying the parallel compute green function pairs 4

---
 shakermaker/shakermaker.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index ededa9d..2dde715 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1646,8 +1646,8 @@ def lor(a, b, c):
             for i in range(nprocs):
                 istartList.append(i * toatalnumberofpairs // nprocs)
                 iendList.append((i + 1) * toatalnumberofpairs // nprocs)
-            print(f"{istartList=}")
-            print(f"{iendList=}")
+            # print(f"{istartList=}")
+            # print(f"{iendList=}")
         
 
         
@@ -1826,8 +1826,8 @@ def lor(a, b, c):
                 print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
                 print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")                
                 print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}")
-                # print notcoverd in a file
-                np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d')
+                # # print notcoverd in a file
+                # np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d')
             else:
 
 
@@ -1907,8 +1907,8 @@ def lor(a, b, c):
                     print(f"\t\tnumber of pairs founded: {n_computed_pairs}")
                     print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}")
                     print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}")
-                    # print notcoverd in a file
-                    np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d')
+                    # # print notcoverd in a file
+                    # np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d')
 
             
                 

From ccae819b5a5b780eda3ace201fa94d7a73055845 Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Mon, 28 Oct 2024 03:10:43 -0700
Subject: [PATCH 06/17] Create IntelSetup.py

This file contains the flags for compiling ShakerMaker with intel compilers.
---
 IntelSetup.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 IntelSetup.py

diff --git a/IntelSetup.py b/IntelSetup.py
new file mode 100644
index 0000000..49fc8cf
--- /dev/null
+++ b/IntelSetup.py
@@ -0,0 +1,154 @@
+from numpy.distutils.core import Extension, setup
+
+
+name = "shakermaker"
+version = "0.1"
+release = "0.01"
+author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren"
+
+srcdir = "shakermaker/core/"
+srcs = [
+    srcdir + 'core.pyf',
+    srcdir + "subgreen.f",
+    srcdir + "subgreen2.f",
+    srcdir + "subfocal.f",
+    srcdir + "subfk.f",
+    srcdir + "subtrav.f",
+    srcdir + "tau_p.f",
+    srcdir + "kernel.f",
+    srcdir + "prop.f",
+    srcdir + "source.f",
+    srcdir + "bessel.f",
+    srcdir + "haskell.f",
+    srcdir + "fft.c",
+    srcdir + "Complex.c"
+]
+
+
+# ext_modules  = [
+#     Extension (
+#         name='shakermaker.core',
+#         sources=srcs,
+#         extra_f77_compile_args=["-ffixed-line-length-132", "-Wno-tabs", "-Wno-unused-dummy-argument", "-fPIC"],
+#         extra_compile_args=["-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"]
+#     )
+# ]
+
+
+ext_modules  = [
+    Extension (
+        name='shakermaker.core',
+        sources=srcs,
+        extra_f77_compile_args=["-132",
+                                "-w",
+                                "-fPIC",
+                                "-O3",
+                                "-xHost",
+                                "-qopenmp",
+                                "-ipo"
+                                ],
+        extra_compile_args=[
+            "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION",
+            "-xHost",          # Optimize for the host machine's processor
+            "-O3",             # Aggressive optimizations
+            "-qopenmp"         # If you are using OpenMP, enables parallelization
+             ]
+    )
+ ]
+
+
+
+'''
+
+ext_modules = [
+    Extension(
+        name='shakermaker.core',
+        sources=srcs,
+        extra_f77_compile_args=[
+            "-O1",             # Safe optimization level: balances performance and stability
+            "-xHost",          # Optimizes for the current machine's processor
+            "-qopenmp",        # Enables parallelization using OpenMP (if the code supports it)
+            "-ipo",            # Interprocedural optimization across multiple files for better performance
+            "-fPIC"            # Ensures position-independent code for shared libraries
+        ],
+        extra_compile_args=[
+            "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION",  # Ensures compatibility with older NumPy APIs
+            "-xHost",          # Processor-specific optimizations for maximum performance
+            "-O1",             # Safe optimization level
+            "-qopenmp"         # Enables parallelization
+        ]
+    )
+]
+
+'''
+
+
+
+
+
+
+
+# ext_modules  = [
+#     Extension (
+#         name='shakermaker.core',
+#         sources=srcs,
+#         extra_f77_compile_args=[
+#             "-extend-source",  # Equivalent to -ffixed-line-length-132
+#             "-w",              # Suppresses all warnings, including tabs and unused dummy arguments
+#             "-fPIC"            # Position-independent code, same as with GCC
+#         ],
+#         extra_compile_args=[
+#             "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION",
+#             # "-xHost",          # Optimize for the host machine's processor
+#             # "-O3",             # Aggressive optimizations
+#             # "-qopenmp"         # If you are using OpenMP, enables parallelization
+#         ]
+#     )
+# ]
+
+
+
+
+cmdclass = {}
+command_options = {}
+
+setup(
+    name             =  name,
+    package_dir      = {'shakermaker': 'shakermaker'},
+    packages         = [
+                            "shakermaker",
+                            "shakermaker.cm_library",
+                            "shakermaker.sl_extensions",
+                            "shakermaker.slw_extensions",
+                            "shakermaker.stf_extensions",
+                            "shakermaker.tools",
+                        ],
+    ext_modules      = ext_modules,
+    version          = version,
+    description      = "README.md",
+    author           = author,
+    author_email     = "info@joseabell.com",
+    url              = "http://www.joseabell.com",
+    download_url     = "tbd",
+    keywords         = ["earthquake", "engineering", "drm", "simulation"],
+    classifiers      = [
+                            "Programming Language :: Python",
+                            "Programming Language :: Python :: 3",
+                            "Programming Language :: Fortran",
+                            "Development Status :: Released",
+                            "Environment :: Other Environment",
+                            "Intended Audience :: Savvy Earthquake Engineers",
+                            "License :: GPL",
+                            "Operating System :: OS Independent",
+                            "Topic :: TBD",
+                            "Topic :: TBD2",
+                        ],
+    long_description =  """\
+                            shakermaker
+                            -------------------------------------
+                            Create realistic seismograms!
+
+                            """,
+    cmdclass         = cmdclass,
+    command_options  = command_options,
+)

From 6e4ca71ee92820a1b5a89c6ea40403a781f5e5c8 Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Mon, 28 Oct 2024 22:18:43 -0700
Subject: [PATCH 07/17] Update shakermaker.py to create DRM points coordinates
 for visualization

---
 shakermaker/shakermaker.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index 2dde715..15df05c 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1566,6 +1566,7 @@ def gen_greens_function_database_pairs(self,
         using_vectorize_manner=False,
         cfactor = 0.5,
         finalcheck=True,
+        StationType="DRM",
         ):
         """Run the simulation. 
         
@@ -1670,6 +1671,19 @@ def lor(a, b, c):
             for i_station, station in enumerate(self._receivers):
                 receiver_matrix[i_station, :] = station.x
 
+
+            # creating DRM stations in csv file
+            if StationType.lower() == "drm":
+                if not os.path.dirname(store_here):  # If the path is just a filename
+                    mother_directory = os.getcwd()  # Get the current working directory
+                else:
+                    mother_directory = os.path.dirname(os.path.abspath(store_here))
+
+                np.savetxt(mother_directory + "/DRMPoints.csv", source_matrix, delimiter=",", header="x,y,z", comments="")
+                
+                
+
+            
             z_rec           = receiver_matrix[:,2]
             z_src           = source_matrix[:,2]
 

From dd0c8959fbae3e62524d292518a9757dd7bbf61c Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Mon, 28 Oct 2024 22:20:41 -0700
Subject: [PATCH 08/17] adding import os, for generate DRMPoints coordinates

---
 shakermaker/shakermaker.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index 15df05c..ed761cf 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1674,6 +1674,7 @@ def lor(a, b, c):
 
             # creating DRM stations in csv file
             if StationType.lower() == "drm":
+                import os
                 if not os.path.dirname(store_here):  # If the path is just a filename
                     mother_directory = os.getcwd()  # Get the current working directory
                 else:

From dea6194b0b0ff503591e145f072cac6031c15748 Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Mon, 28 Oct 2024 23:10:15 -0700
Subject: [PATCH 09/17] replacing source matrix by receiver matrix

---
 shakermaker/shakermaker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py
index ed761cf..d1c0048 100644
--- a/shakermaker/shakermaker.py
+++ b/shakermaker/shakermaker.py
@@ -1680,7 +1680,7 @@ def lor(a, b, c):
                 else:
                     mother_directory = os.path.dirname(os.path.abspath(store_here))
 
-                np.savetxt(mother_directory + "/DRMPoints.csv", source_matrix, delimiter=",", header="x,y,z", comments="")
+                np.savetxt(mother_directory + "/DRMPoints.csv", receiver_matrix, delimiter=",", header="x,y,z", comments="")
                 
                 
 

From a29c8296484cef970a8d4b66c6ca8cc91e423edb Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Thu, 7 Nov 2024 14:25:46 -0800
Subject: [PATCH 10/17] adding windows compiling instructions

---
 IntelWindowsSetup.py | 99 ++++++++++++++++++++++++++++++++++++++++++++
 windows.bat          | 17 ++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 IntelWindowsSetup.py
 create mode 100644 windows.bat

diff --git a/IntelWindowsSetup.py b/IntelWindowsSetup.py
new file mode 100644
index 0000000..0ed5762
--- /dev/null
+++ b/IntelWindowsSetup.py
@@ -0,0 +1,99 @@
+import os
+from numpy.distutils.core import Extension, setup
+
+# Project information
+name = "shakermaker"
+version = "0.1"
+release = "0.01"
+author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren"
+
+# Source files
+srcdir = "shakermaker/core/"
+srcs = [
+    srcdir + 'core.pyf',
+    srcdir + "subgreen.f",
+    srcdir + "subgreen2.f",
+    srcdir + "subfocal.f",
+    srcdir + "subfk.f",
+    srcdir + "subtrav.f",
+    srcdir + "tau_p.f",
+    srcdir + "kernel.f",
+    srcdir + "prop.f",
+    srcdir + "source.f",
+    srcdir + "bessel.f",
+    srcdir + "haskell.f",
+    srcdir + "fft.c",
+    srcdir + "Complex.c"
+]
+
+# Set Intel compilers for C and Fortran
+os.environ['FC'] = 'ifx'  # Intel Fortran Compiler
+os.environ['CC'] = 'icx'  # Intel C Compiler
+os.environ['CXX'] = 'icx'  # Intel C++ Compiler
+os.environ['F77'] = 'ifx'  # Set Fortran 77 compiler
+
+
+
+# Compiler flags for Intel compilers
+extra_f77_compile_args = [
+    "/extend-source:132",   # For Fortran line length compatibility
+    "/Qparallel",           # Parallel compilation
+    "/O1",                  # Optimization level 3
+    "/Qopenmp",             # OpenMP support
+]
+
+extra_compile_args = [
+    "/DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION",
+    "/QxHost",              # Optimize for host machine's processor
+    "/O1",                  # Aggressive optimization
+    "/Qopenmp",             # Enable OpenMP for parallel processing
+]
+
+# Define extension modules
+ext_modules = [
+    Extension(
+        name='shakermaker.core',
+        sources=srcs,
+        extra_f77_compile_args=extra_f77_compile_args,
+        extra_compile_args=extra_compile_args,
+        include_dirs=["D:/Programs/Intel/oneAPI/include"],  # Adjust to your include path
+        library_dirs=["D:/Programs/Intel/oneAPI/lib"]       # Adjust to your library path
+    )
+]
+
+# Define setup configuration
+setup(
+    name=name,
+    package_dir={'shakermaker': 'shakermaker'},
+    packages=[
+        "shakermaker",
+        "shakermaker.cm_library",
+        "shakermaker.sl_extensions",
+        "shakermaker.slw_extensions",
+        "shakermaker.stf_extensions",
+        "shakermaker.tools",
+    ],
+    ext_modules=ext_modules,
+    version=version,
+    description="README.md",
+    author=author,
+    author_email="info@joseabell.com",
+    url="http://www.joseabell.com",
+    download_url="tbd",
+    keywords=["earthquake", "engineering", "drm", "simulation"],
+    classifiers=[
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Fortran",
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: GNU General Public License (GPL)",
+        "Operating System :: OS Independent",
+        "Topic :: Scientific/Engineering",
+    ],
+    long_description="""
+    shakermaker
+    -------------------------------------
+    Create realistic seismograms!
+    """,
+)
diff --git a/windows.bat b/windows.bat
new file mode 100644
index 0000000..b9f1649
--- /dev/null
+++ b/windows.bat
@@ -0,0 +1,17 @@
+python -m venv myenv
+call .\myenv\Scripts\activate
+call pip install setuptools
+call pip install numpy==1.23.5
+call pip install scipy==1.11.4
+call pip install matplotlib
+call pip install geopy
+call pip install pyproj
+call pip install tqdm
+call pip install h5py
+call pip install mpi4py
+
+@REM replace the path with your own path to the Intel oneAPI installation
+call "D:\Programs\Intel\oneAPI\setvars.bat"
+python IntelWindowsSetup.py build
+python IntelWindowsSetup.py install
+

From d23f071e4e30fccd38e912caa8af739dcd6a5f98 Mon Sep 17 00:00:00 2001
From: Amin Pakzad <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 11:34:56 -0700
Subject: [PATCH 11/17] Create Stampede3Setup.py

---
 Stampede3Setup.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 Stampede3Setup.py

diff --git a/Stampede3Setup.py b/Stampede3Setup.py
new file mode 100644
index 0000000..ac46aac
--- /dev/null
+++ b/Stampede3Setup.py
@@ -0,0 +1,88 @@
+from numpy.distutils.core import Extension, setup
+
+
+name = "shakermaker"
+version = "0.1"
+release = "0.01"
+author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren"
+
+srcdir = "shakermaker/core/"
+srcs = [
+    srcdir + 'core.pyf',
+    srcdir + "subgreen.f",
+    srcdir + "subgreen2.f",
+    srcdir + "subfocal.f",
+    srcdir + "subfk.f",
+    srcdir + "subtrav.f",
+    srcdir + "tau_p.f",
+    srcdir + "kernel.f",
+    srcdir + "prop.f",
+    srcdir + "source.f",
+    srcdir + "bessel.f",
+    srcdir + "haskell.f",
+    srcdir + "fft.c",
+    srcdir + "Complex.c"
+]
+
+# Minimal flags for compatibility
+ext_modules = [
+    Extension(
+        name='shakermaker.core',
+        sources=srcs,
+        extra_f77_compile_args=[
+            "-132",            # Fixed format 132 character line length
+            "-w",              # Disable warnings
+            "-fPIC",           # Position Independent Code
+            "-O2",             # Moderate optimization level
+        ],
+        extra_compile_args=[
+            "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION",
+            "-O2",             # Moderate optimization level
+            "-fPIC"            # Position Independent Code
+        ]
+    )
+]
+
+cmdclass = {}
+command_options = {}
+
+setup(
+    name=name,
+    package_dir={'shakermaker': 'shakermaker'},
+    packages=[
+        "shakermaker",
+        "shakermaker.cm_library",
+        "shakermaker.sl_extensions",
+        "shakermaker.slw_extensions",
+        "shakermaker.stf_extensions",
+        "shakermaker.tools",
+    ],
+    ext_modules=ext_modules,
+    version=version,
+    description="README.md",
+    author=author,
+    author_email="info@joseabell.com",
+    url="http://www.joseabell.com",
+    download_url="tbd",
+    keywords=["earthquake", "engineering", "drm", "simulation"],
+    classifiers=[
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Fortran",
+        "Development Status :: Released",
+        "Environment :: Other Environment",
+        "Intended Audience :: Savvy Earthquake Engineers",
+        "License :: GPL",
+        "Operating System :: OS Independent",
+        "Topic :: TBD",
+        "Topic :: TBD2",
+    ],
+    long_description="""\
+                            shakermaker
+                            -------------------------------------
+                            Create realistic seismograms!
+
+                            """,
+    cmdclass=cmdclass,
+    command_options=command_options,
+)

From 854dc82da5fdcd9dad7a9fbbd636b409acf8f0a1 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 11:50:19 -0700
Subject: [PATCH 12/17] organizing setup files for different systems

---
 IntelSetup.py => tacc/frontera.py       | 0
 Stampede3Setup.py => tacc/stampede3.py  | 0
 windows.bat => tacc/windows.bat         | 0
 IntelWindowsSetup.py => tacc/windows.py | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename IntelSetup.py => tacc/frontera.py (100%)
 rename Stampede3Setup.py => tacc/stampede3.py (100%)
 rename windows.bat => tacc/windows.bat (100%)
 rename IntelWindowsSetup.py => tacc/windows.py (100%)

diff --git a/IntelSetup.py b/tacc/frontera.py
similarity index 100%
rename from IntelSetup.py
rename to tacc/frontera.py
diff --git a/Stampede3Setup.py b/tacc/stampede3.py
similarity index 100%
rename from Stampede3Setup.py
rename to tacc/stampede3.py
diff --git a/windows.bat b/tacc/windows.bat
similarity index 100%
rename from windows.bat
rename to tacc/windows.bat
diff --git a/IntelWindowsSetup.py b/tacc/windows.py
similarity index 100%
rename from IntelWindowsSetup.py
rename to tacc/windows.py

From 6b18a255ac2c1c74d47eff8f924f624252864851 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 11:55:13 -0700
Subject: [PATCH 13/17] add stampede3.sh script for building setup

---
 tacc/stampede3.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 tacc/stampede3.sh

diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh
new file mode 100644
index 0000000..897f102
--- /dev/null
+++ b/tacc/stampede3.sh
@@ -0,0 +1,12 @@
+clear
+ml phdf5
+rm -rf build
+
+export CC=icx
+export CXX=icpx
+export FC=ifx
+export F77=ifx
+export F90=ifx
+
+python3 IntelSetup.py build
+# python3 IntelSetup.py install
\ No newline at end of file

From 4925893b5354475fae32232a8d8df752efa52bc6 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 11:56:01 -0700
Subject: [PATCH 14/17] update stampede3.sh to use stampede3.py for build
 process

---
 tacc/stampede3.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh
index 897f102..4a69195 100644
--- a/tacc/stampede3.sh
+++ b/tacc/stampede3.sh
@@ -8,5 +8,5 @@ export FC=ifx
 export F77=ifx
 export F90=ifx
 
-python3 IntelSetup.py build
-# python3 IntelSetup.py install
\ No newline at end of file
+python3 stampede3.py build
+# python3 stampede3.py install
\ No newline at end of file

From 4f4d5b256351bdb47e0d6f4f5a7b2cca1d122054 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 12:11:21 -0700
Subject: [PATCH 15/17] refactor stampede3.sh to improve formatting and
 organization of dependency installation

---
 tacc/stampede3.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh
index 4a69195..246db4a 100644
--- a/tacc/stampede3.sh
+++ b/tacc/stampede3.sh
@@ -2,11 +2,18 @@ clear
 ml phdf5
 rm -rf build
 
+# build an envirormrnt and activate it (not necessary, but recommended)
+
+# installing the dependencies
+pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm
+
+
 export CC=icx
 export CXX=icpx
 export FC=ifx
 export F77=ifx
 export F90=ifx
 
+
 python3 stampede3.py build
 # python3 stampede3.py install
\ No newline at end of file

From 42be0ecc56b548448684cb620cb7890b3a4bb604 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 12:40:07 -0700
Subject: [PATCH 16/17] enhance stampede3.sh by adding missing dependencies and
 improving environment setup

---
 tacc/stampede3.sh | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh
index 246db4a..01fda00 100644
--- a/tacc/stampede3.sh
+++ b/tacc/stampede3.sh
@@ -1,19 +1,46 @@
+# building environment for stampede3 should be done in a idev mode
+# idev 
+
+# after the idev command, you will be in a new shell
 clear
 ml phdf5
-rm -rf build
+ml python/3.9.18
+
 
 # build an envirormrnt and activate it (not necessary, but recommended)
 
+ENVNAME=ShakerMaker_env
+# ENVNAME=ShakerMakerTestingEnv
+
+python3 -m venv $ENVNAME
+source $ENVNAME/bin/activate
+
+
 # installing the dependencies
-pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm
+pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm h5py mpi4py
+
+
 
+# install the ShakerMaker package
+# clone the repository
+git clone https://github.com/amnp95/ShakerMaker.git
+cd ShakerMaker
+cp tacc/stampede3.py .
 
+# set the environment variables for the Intel compiler
 export CC=icx
 export CXX=icpx
 export FC=ifx
 export F77=ifx
 export F90=ifx
 
-
+# build the package
 python3 stampede3.py build
-# python3 stampede3.py install
\ No newline at end of file
+python3 stampede3.py install
+
+
+
+# pip list : you should see the ShakerMaker package in the list of installed packages
+pip list
+
+

From 585a866b08ae4596582b37d09edfd8fd970d1f56 Mon Sep 17 00:00:00 2001
From: amnp95 <amnp95@uw.edu>
Date: Mon, 28 Apr 2025 12:44:17 -0700
Subject: [PATCH 17/17] fix stampede3.sh by adding missing cd command to return
 to the previous directory

---
 tacc/stampede3.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh
index 01fda00..d20e7fa 100644
--- a/tacc/stampede3.sh
+++ b/tacc/stampede3.sh
@@ -42,5 +42,6 @@ python3 stampede3.py install
 
 # pip list : you should see the ShakerMaker package in the list of installed packages
 pip list
+cd ..