From a8faee9b782fe3c248378a82d234464449eea80e Mon Sep 17 00:00:00 2001 From: amnp95 Date: Tue, 10 Sep 2024 18:53:25 -0700 Subject: [PATCH 01/17] Vectorizing the finding of pairs in a single processor --- shakermaker/shakermaker.py | 668 ++++++++++++++++++++++++++++++++----- 1 file changed, 583 insertions(+), 85 deletions(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index 8dbe7e5..9574594 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -9,6 +9,7 @@ import imp import traceback from time import perf_counter +from tqdm import tqdm try: @@ -1561,6 +1562,9 @@ def gen_greens_function_database_pairs(self, showProgress=True, store_here=None, npairs_max=100000, + using_vectorize_manner=False, + cfactor = 0.5, + longer_version=True ): """Run the simulation. @@ -1610,109 +1614,603 @@ def gen_greens_function_database_pairs(self, .format(self._source.nsources, self._receivers.nstations, self._source.nsources*self._receivers.nstations, dt, nfft)) - ipair = 0 - - npairs = self._receivers.nstations*len(self._source._pslist) - - print(f"{npairs=}") + if rank == 0: + print("SahkerMaker.run - starting") + print(f"Number of sources: {self._source.nsources}") + print(f"Number of receivers: {self._receivers.nstations}") + print(f"Total src-rcv pairs: {self._source.nsources*self._receivers.nstations}") + print(f"{dt=}") + print(f"{nfft=}") - # dists = np.zeros((npairs, 2)) - pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32) - dd_of_pairs = np.empty(npairs_max, dtype=np.double) - dh_of_pairs = np.empty(npairs_max, dtype=np.double) - dv_of_pairs = np.empty(npairs_max, dtype=np.double) - zrec_of_pairs = np.empty(npairs_max, dtype=np.double) - zsrc_of_pairs = np.empty(npairs_max, dtype=np.double) + factor = cfactor + - # Initialize the counter for the number of computed pairs. - n_computed_pairs = 0 + - # def lor(a,b,c): - # return np.logical_or(a,np.logical_or(b,c)) def lor(a, b, c): return a | b | c + # wait for all ranks to reach here - for i_station, station in enumerate(self._receivers): + toatalnumberofpairs = self._receivers.nstations*self._source.nsources + # parst the indcies from zero to totalnumberofpairs into nprocs parts + # and assign each part to a rank + # the rank 0 will have the first part, rank 1 will have the second part and so on + istart = rank * toatalnumberofpairs // nprocs + iend = (rank + 1) * toatalnumberofpairs // nprocs + if rank == 0: + istartList = [] + iendList = [] + for i in range(nprocs): + istartList.append(i * toatalnumberofpairs // nprocs) + iendList.append((i + 1) * toatalnumberofpairs // nprocs) + print(f"{istartList=}") + print(f"{iendList=}") + + + + + + if rank == 0: + # start timing + perf_time_begin = perf_counter() + npairs = self._receivers.nstations*self._source.nsources + print(f"{npairs=}") + pairs_to_compute = np.empty((npairs, 2), dtype=np.int32) + + print ("Creating source matrix") + source_matrix = np.empty((self._source.nsources, 3), dtype=np.double) for i_psource, psource in enumerate(self._source): + source_matrix[i_psource, :] = psource.x + + print ("Creating receiver matrix") + receiver_matrix = np.empty((self._receivers.nstations, 3), dtype=np.double) + for i_station, station in enumerate(self._receivers): + receiver_matrix[i_station, :] = station.x + + z_rec = receiver_matrix[:,2] + z_src = source_matrix[:,2] + + # compute the distance matrix + print ("Computing the distance matrix") + dd = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2) + dd = dd.flatten() + + # compute the horizontal distance matrix + print ("Computing the horizontal distance matrix") + receiver_matrix = receiver_matrix[:,:2] + source_matrix = source_matrix[:,:2] + dh = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2) + dh = dh.flatten() + + + # compute the vertical distance matrix + print ("Computing the vertical distance matrix") + z_rec, z_src = np.meshgrid(z_rec, z_src, indexing='ij') + z_rec = z_rec.flatten() + z_src = z_src.flatten() + dv = np.abs(z_rec - z_src) + + comm.Barrier() + if nprocs > 1: + # create dh z_rec and z_src for all ranks + mini_dh = np.empty(iend-istart, dtype=np.double) + mini_z_rec = np.empty(iend-istart, dtype=np.double) + mini_z_src = np.empty(iend-istart, dtype=np.double) + if rank == 0: + # send sub dh, z_rec and z_src to all ranks + for i in range(1, nprocs): + istart = istartList[i] + iend = iendList[i] + comm.Send(dh[istart:iend], dest=i, tag=1) + comm.Send(z_rec[istart:iend], dest=i, tag=2) + comm.Send(z_src[istart:iend], dest=i, tag=3) + istart = istartList[0] + iend = iendList[0] + else: + comm.Recv(mini_dh, source=0, tag=1) + comm.Recv(mini_z_rec, source=0, tag=2) + comm.Recv(mini_z_src, source=0, tag=3) + elif nprocs == 1: + print("running on one processor") + istart = 0 + iend = toatalnumberofpairs + + comm.Barrier() + + + if using_vectorize_manner==True and longer_version==False: + if rank == 0: + # Creating bins + print ("Computing the bins") + eps = 1e-8 + dh_max , dh_min = np.max(dh), np.min(dh) + dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor) + + dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec) + dzrec_bins = np.arange(dzrec_min, dzrec_max + eps, delta_v_rec*factor) + dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src) + dzsrc_bins = np.arange(dzsrc_min, dzsrc_max + eps, delta_v_src*factor) - if n_computed_pairs >= npairs_max: - print("Exceeded number of pairs!!") - exit(0) + # computing total number of bins + num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins) + print(f"Total number of bins: {num_bins}") - t1 = perf_counter() + # digitizing the data + print ("Digitizing the data") + dh_digitized = np.digitize(dh, dh_bins) + z_rec_digitized = np.digitize(z_rec, dzrec_bins) + z_src_digitized = np.digitize(z_src, dzsrc_bins) - x_src = psource.x - x_rec = station.x - z_rec = station.x[2] - z_src = psource.x[2] - - d = x_rec - x_src - - dd = np.linalg.norm(d) - dh = np.linalg.norm(d[0:2]) - dv = np.abs(d[2]) - - # dists[ipair,0] = dh - # dists[ipair,1] = dv - - condition = lor(np.abs(dh - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) - - # condition = (np.abs(dd - dd_of_pairs[:n_computed_pairs]) < delta_h) & \ - # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) - # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \ - # (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v_src) &\ - # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v_rec) - # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \ - # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) & \ - # (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v) - # (np.abs(dv - dv_of_pairs[:n_computed_pairs]) < delta_v) & \ - - if n_computed_pairs == 0 or np.all(condition): - pairs_to_compute[n_computed_pairs,:] = [i_station, i_psource] - dd_of_pairs[n_computed_pairs] = dd - dh_of_pairs[n_computed_pairs] = dh - dv_of_pairs[n_computed_pairs] = dv - zrec_of_pairs[n_computed_pairs] = z_rec - zsrc_of_pairs[n_computed_pairs] = z_src + + + # creat matrix of pairs + print ("Creating the pairs") + dh_digitized -= 1 + z_rec_digitized -= 1 + z_src_digitized -= 1 + # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)] + pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized + pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True) + indices = np.sort(indices) + i_station = indices // len(self._source._pslist) + i_psource = indices % len(self._source._pslist) + + + + + + + + npairs_max = len(pairs_to_compute) + pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32) + dd_of_pairs = np.empty(npairs_max, dtype=np.double) + dh_of_pairs = np.empty(npairs_max, dtype=np.double) + dv_of_pairs = np.empty(npairs_max, dtype=np.double) + zrec_of_pairs = np.empty(npairs_max, dtype=np.double) + zsrc_of_pairs = np.empty(npairs_max, dtype=np.double) + + print(f"npairs_max = {npairs_max}") + # ==================================================================================================== + # optimizing the number of pairs + # ==================================================================================================== + + + + n_computed_pairs = 0 + print ("Optimizing the number of pairs") + print("\titeration 1") + print("\t\treducing the number of pairs:") + + # reducing the number of pairs + + for j in tqdm(range(len(indices))): + i = indices[j] + condition = lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) + + if n_computed_pairs == 0 or np.all(condition): + pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]] + dd_of_pairs[n_computed_pairs] = dd[i] + dh_of_pairs[n_computed_pairs] = dh[i] + dv_of_pairs[n_computed_pairs] = dv[i] + zrec_of_pairs[n_computed_pairs] = z_rec[i] + zsrc_of_pairs[n_computed_pairs] = z_src[i] + + n_computed_pairs += 1 + + + # if j % 10000 == 0 or j == len(indices) - 1: + # print(f"\t\t\tOn {j=} of {npairs_max} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") + + # wait for all ranks to reach here + comm.Barrier() + if nprocs ==1 : + # now check that the founded pairs cover all the points + notcoverd = [] + for i in range(len(dh)): + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + + # print(len(notcoverd)) + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") + print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}") + else: + + + if rank == 0: + # On rank 0, calculate or set the value of n_computed_pairs + n_computed_pairs = np.array(n_computed_pairs, dtype='i') # Example value + else: + # On other ranks, create an empty array to receive the value + n_computed_pairs = np.empty(1, dtype='i') + + # Broadcast the value from rank 0 to all other ranks + comm.Bcast(n_computed_pairs, root=0) + + # Extract the integer from the NumPy array + # check that if the array is zero dimensional + n_computed_pairs = n_computed_pairs.item() + + + + comm.Barrier() + + if rank > 0: + dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + + comm.Barrier() + # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks + if rank == 0: + for i in range(1, nprocs): + comm.Send(dh_of_pairs[:n_computed_pairs], dest=i, tag=5) + comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6) + comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7) + else: + comm.Recv(dh_of_pairs, source=0, tag=5) + comm.Recv(zrec_of_pairs, source=0, tag=6) + comm.Recv(zsrc_of_pairs, source=0, tag=7) + comm.Barrier() + # now found uncovered points for each rank + notcoverd = [] + if rank == 0: + # measure the percentage of the cheking + print(f"\t\tchecking the uncovered points") + for i in tqdm(range(iend)): + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + + else: + for i in range(len(mini_dh)): + if np.all(lor(np.abs(mini_dh[i] - dh_of_pairs) > delta_h, \ + np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \ + np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)): + notcoverd.append(i) + + # if rank == 0: + # print(f"{rank=} {notcoverd=}") + notcoverd = np.array(notcoverd) + istart + + comm.Barrier() + # print(f"{rank=} {len(notcoverd)=}") + # Gather the sizes of the notcoverd arrays from all processes + notcoverd_sizes = comm.gather(len(notcoverd), root=0) + + if rank == 0: + # Calculate the total size and create a buffer to hold all data + total_size = sum(notcoverd_sizes) + all_notcoverd = np.empty(total_size, dtype=int) + else: + all_notcoverd = None + + # Gather all notcoverd arrays to the root process + comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0) + notcoverd = all_notcoverd + if rank == 0: + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") + print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") + + + + + + + # wait for all ranks to reach here + comm.Barrier() + if rank == 0: + + iteration = 2 + while len(notcoverd) > 0: + print(f"\titeration {iteration}") + dh_copy = dh[notcoverd] + z_rec_copy = z_rec[notcoverd] + z_src_copy = z_src[notcoverd] + indexes = np.array(notcoverd) + + # digitizing the data + dh_digitized = np.digitize(dh[notcoverd], dh_bins) + z_rec_digitized = np.digitize(z_rec[notcoverd], dzrec_bins) + z_src_digitized = np.digitize(z_src[notcoverd], dzsrc_bins) + + dh_digitized -= 1 + z_rec_digitized -= 1 + z_src_digitized -= 1 + + additional_pairs = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized + additional_pairs, indices = np.unique(additional_pairs, return_index=True) + indices = np.sort(indices) + indices = indexes[indices] + + + + print(f"\t\treducing the number of pairs:") + for j in tqdm(range(len(indices))): + i = indices[j] + condition = lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) + if np.all(condition): + istation = i // len(self._source._pslist) + ipsource = i % len(self._source._pslist) + pairs_to_compute[n_computed_pairs,:] = [istation, ipsource] + dd_of_pairs[n_computed_pairs] = dd[i] + dh_of_pairs[n_computed_pairs] = dh[i] + dv_of_pairs[n_computed_pairs] = dv[i] + zrec_of_pairs[n_computed_pairs] = z_rec[i] + zsrc_of_pairs[n_computed_pairs] = z_src[i] + n_computed_pairs += 1 + # if (j+1) % 1000 == 0: + # print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") + + notcoverd = [] + print(f"\t\tchecking the uncovered points") + for j in tqdm(range(len(indexes))): + i = indexes[j] + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + iteration += 1 + + + + print("Optimization done") + print(f"Number of pairs founded: {n_computed_pairs}") + print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}") + print(f"Number of points not covered yet: {len(notcoverd)}") + # print timing + perf_time_end = perf_counter() + time = perf_time_end - perf_time_begin + # conver time to hours, minutes and seconds + print(f" Time: hours: {time//3600} minutes: {(time%3600)//60} seconds: {time%60}") + + + # if rank == 0: + # print("final check") + # # checking the not covered points + # notcoverd = [] + # for i in range(len(dh)): + # if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + # notcoverd.append(i) + # # print(f"Error: {i} not found in the pairs notcoverd={notcoverd}") + + # print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + + comm.Barrier() + + if using_vectorize_manner==True and longer_version==True: + print("Using vectorize manner and longer version") + if rank == 0: + # make the factor 0.99 to avoid the error in the digitize function + factor = 0.98 + # Creating bins + print ("Computing the bins") + eps = 1e-8 + dh_max , dh_min = np.max(dh), np.min(dh) + # dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor) + num = (dh_max - dh_min) / (delta_h*factor) + # celing the number of bins + num = np.ceil(num) + num = int(num) + # bins should include the maximum value + dh_bins = np.linspace(dh_min-eps, dh_max+eps, num+1) + + + dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec) + num = (dzrec_max - dzrec_min) / (delta_v_rec*factor) + # celing the number of bins + num = np.ceil(num) + num = int(num) + # bins should include the maximum value + dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max+eps, num+1) + + + dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src) + num = (dzsrc_max - dzsrc_min) / (delta_v_src*factor) + # celing the number of bins + num = np.ceil(num) + num = int(num) + # bins should include the maximum value + dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max+eps, num+1) + + + # verify that the bins differ by delta_h, delta_v_rec and delta_v_src + assert np.all(np.diff(dh_bins) - delta_h < 1e-8) + assert np.all(np.diff(dzrec_bins) - delta_v_rec < 1e-8) + assert np.all(np.diff(dzsrc_bins) - delta_v_src < 1e-8) + + + # computing total number of bins + num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins) + print(f"Total number of bins: {num_bins}") + + # digitizing the data + print ("Digitizing the data") + dh_digitized = np.digitize(dh, dh_bins) + z_rec_digitized = np.digitize(z_rec, dzrec_bins) + z_src_digitized = np.digitize(z_src, dzsrc_bins) + + + + + # creat matrix of pairs + print ("Creating the pairs") + dh_digitized -= 1 + z_rec_digitized -= 1 + z_src_digitized -= 1 + # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)] + pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized + pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True) + indices = np.sort(indices) + i_station = indices // len(self._source._pslist) + i_psource = indices % len(self._source._pslist) + + + + + + + + npairs_max = len(pairs_to_compute) + pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32) + dd_of_pairs = np.empty(npairs_max, dtype=np.double) + dh_of_pairs = np.empty(npairs_max, dtype=np.double) + dv_of_pairs = np.empty(npairs_max, dtype=np.double) + zrec_of_pairs = np.empty(npairs_max, dtype=np.double) + zsrc_of_pairs = np.empty(npairs_max, dtype=np.double) + + print(f"npairs_max = {npairs_max}") + # ==================================================================================================== + # optimizing the number of pairs + # ==================================================================================================== + + + + n_computed_pairs = 0 + print ("Optimizing the number of pairs") + print("\titeration 1") + print("\t\treducing the number of pairs:") + + # reducing the number of pairs + + for j in tqdm(range(len(indices))): + i = indices[j] + pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]] + dd_of_pairs[n_computed_pairs] = dd[i] + dh_of_pairs[n_computed_pairs] = dh[i] + dv_of_pairs[n_computed_pairs] = dv[i] + zrec_of_pairs[n_computed_pairs] = z_rec[i] + zsrc_of_pairs[n_computed_pairs] = z_src[i] n_computed_pairs += 1 - t2 = perf_counter() - if ipair % 10000 == 0: - ETA = (t2-t1)*(npairs-ipair)/3600. - print(f"On {ipair=} of {npairs} {n_computed_pairs=} ({n_computed_pairs/npairs*100}% reduction) {ETA=}h") + if using_vectorize_manner==False: + srart = perf_counter() - ipair += 1 - - - pairs_to_compute = pairs_to_compute[:n_computed_pairs,:] - dd_of_pairs = dd_of_pairs[:n_computed_pairs] - dh_of_pairs = dh_of_pairs[:n_computed_pairs] - dv_of_pairs = dv_of_pairs[:n_computed_pairs] - zrec_of_pairs = zrec_of_pairs[:n_computed_pairs] - zsrc_of_pairs = zsrc_of_pairs[:n_computed_pairs] - - print(f"Need only {n_computed_pairs} pairs of {npairs} ({n_computed_pairs/npairs*100}% reduction)") - - if store_here is not None: - import h5py - with h5py.File(store_here + '.h5', 'w') as hf: - # hf.create_dataset("dists", data=dists) - hf.create_dataset("pairs_to_compute", data=pairs_to_compute) - hf.create_dataset("dd_of_pairs", data=dd_of_pairs) - hf.create_dataset("dh_of_pairs", data=dh_of_pairs) - hf.create_dataset("dv_of_pairs", data=dv_of_pairs) - hf.create_dataset("zrec_of_pairs", data=zrec_of_pairs) - hf.create_dataset("zsrc_of_pairs", data=zsrc_of_pairs) - hf.create_dataset("delta_h", data=delta_h) - hf.create_dataset("delta_v_rec", data=delta_v_rec) - hf.create_dataset("delta_v_src", data=delta_v_src) + ipair = 0 + + npairs = self._receivers.nstations*len(self._source._pslist) + + print(f"{npairs=}") + + # dists = np.zeros((npairs, 2)) + + pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32) + dd_of_pairs = np.empty(npairs_max, dtype=np.double) + dh_of_pairs = np.empty(npairs_max, dtype=np.double) + dv_of_pairs = np.empty(npairs_max, dtype=np.double) + zrec_of_pairs = np.empty(npairs_max, dtype=np.double) + zsrc_of_pairs = np.empty(npairs_max, dtype=np.double) + + # Initialize the counter for the number of computed pairs. + n_computed_pairs = 0 + + # def lor(a,b,c): + # return np.logical_or(a,np.logical_or(b,c)) + + + + for i_station, station in enumerate(self._receivers): + for i_psource, psource in enumerate(self._source): + + + if n_computed_pairs >= npairs_max: + print("Exceeded number of pairs!!") + exit(0) + + t1 = perf_counter() + + x_src = psource.x + x_rec = station.x + + z_rec = station.x[2] + z_src = psource.x[2] + + d = x_rec - x_src + + dd = np.linalg.norm(d) + dh = np.linalg.norm(d[0:2]) + dv = np.abs(d[2]) + + # dists[ipair,0] = dh + # dists[ipair,1] = dv + + condition = lor(np.abs(dh - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) + + # condition = (np.abs(dd - dd_of_pairs[:n_computed_pairs]) < delta_h) & \ + # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) + # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \ + # (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v_src) &\ + # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v_rec) + # condition = (np.abs(dh - dh_of_pairs[:n_computed_pairs]) < delta_h) & \ + # (np.abs(z_rec - zrec_of_pairs[:n_computed_pairs]) < delta_v) & \ + # (np.abs(z_src - zsrc_of_pairs[:n_computed_pairs]) < delta_v) + # (np.abs(dv - dv_of_pairs[:n_computed_pairs]) < delta_v) & \ + + if n_computed_pairs == 0 or np.all(condition): + pairs_to_compute[n_computed_pairs,:] = [i_station, i_psource] + dd_of_pairs[n_computed_pairs] = dd + dh_of_pairs[n_computed_pairs] = dh + dv_of_pairs[n_computed_pairs] = dv + zrec_of_pairs[n_computed_pairs] = z_rec + zsrc_of_pairs[n_computed_pairs] = z_src + + n_computed_pairs += 1 + # print on normal file with 4 percision + t2 = perf_counter() + + if ipair % 10000 == 0: + ETA = (t2-t1)*(npairs-ipair)/3600. + print(f"On {ipair=} of {npairs} {n_computed_pairs=} ({n_computed_pairs/npairs*100}% reduction) {ETA=}h") + + ipair += 1 + end = perf_counter() + + print(f"Time: hours: {(end-srart)//3600} minutes: {((end-srart)%3600)//60} seconds: {(end-srart)%60}") + + + if rank == 0: + pairs_to_compute = pairs_to_compute[:n_computed_pairs,:] + dd_of_pairs = dd_of_pairs[:n_computed_pairs] + dh_of_pairs = dh_of_pairs[:n_computed_pairs] + dv_of_pairs = dv_of_pairs[:n_computed_pairs] + zrec_of_pairs = zrec_of_pairs[:n_computed_pairs] + zsrc_of_pairs = zsrc_of_pairs[:n_computed_pairs] + + print(f"Need only {n_computed_pairs} pairs of {npairs} ({n_computed_pairs/npairs*100}% reduction)") + + if store_here is not None: + import h5py + with h5py.File(store_here + '.h5', 'w') as hf: + # hf.create_dataset("dists", data=dists) + hf.create_dataset("pairs_to_compute", data=pairs_to_compute) + hf.create_dataset("dd_of_pairs", data=dd_of_pairs) + hf.create_dataset("dh_of_pairs", data=dh_of_pairs) + hf.create_dataset("dv_of_pairs", data=dv_of_pairs) + hf.create_dataset("zrec_of_pairs", data=zrec_of_pairs) + hf.create_dataset("zsrc_of_pairs", data=zsrc_of_pairs) + hf.create_dataset("delta_h", data=delta_h) + hf.create_dataset("delta_v_rec", data=delta_v_rec) + hf.create_dataset("delta_v_src", data=delta_v_src) # return dists, pairs_to_compute, dh_of_pairs, dv_of_pairs, zrec_of_pairs, zrec_of_pairs From ec886d3077be667db8450a3e73cb0c664baf9523 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Wed, 11 Sep 2024 03:22:49 -0700 Subject: [PATCH 02/17] modifying the parallel compute green function pairs --- shakermaker/shakermaker.py | 151 ++++++++++++++++++++----------------- 1 file changed, 82 insertions(+), 69 deletions(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index 9574594..c12b813 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1809,6 +1809,7 @@ def lor(a, b, c): # wait for all ranks to reach here comm.Barrier() + if nprocs ==1 : # now check that the founded pairs cover all the points notcoverd = [] @@ -1823,8 +1824,6 @@ def lor(a, b, c): print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}") else: - - if rank == 0: # On rank 0, calculate or set the value of n_computed_pairs n_computed_pairs = np.array(n_computed_pairs, dtype='i') # Example value @@ -1842,65 +1841,69 @@ def lor(a, b, c): comm.Barrier() - - if rank > 0: - dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - - comm.Barrier() - # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks - if rank == 0: - for i in range(1, nprocs): - comm.Send(dh_of_pairs[:n_computed_pairs], dest=i, tag=5) - comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6) - comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7) - else: - comm.Recv(dh_of_pairs, source=0, tag=5) - comm.Recv(zrec_of_pairs, source=0, tag=6) - comm.Recv(zsrc_of_pairs, source=0, tag=7) - comm.Barrier() - # now found uncovered points for each rank - notcoverd = [] - if rank == 0: - # measure the percentage of the cheking - print(f"\t\tchecking the uncovered points") - for i in tqdm(range(iend)): - if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): - notcoverd.append(i) - - else: - for i in range(len(mini_dh)): - if np.all(lor(np.abs(mini_dh[i] - dh_of_pairs) > delta_h, \ - np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \ - np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)): - notcoverd.append(i) + def get_not_covered_points(rank, nprocs): + if rank > 0: + dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + + comm.Barrier() + # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks + if rank == 0: + for i in range(1, nprocs): + comm.Send(dh_of_pairs[:n_computed_pairs], dest=i, tag=5) + comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6) + comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7) + else: + comm.Recv(dh_of_pairs, source=0, tag=5) + comm.Recv(zrec_of_pairs, source=0, tag=6) + comm.Recv(zsrc_of_pairs, source=0, tag=7) + comm.Barrier() + # now found uncovered points for each rank + notcoverd = [] + if rank == 0: + # measure the percentage of the cheking + print(f"\t\tchecking the uncovered points") + for i in tqdm(range(iend)): + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + + else: + for i in range(len(mini_dh)): + if np.all(lor(np.abs(mini_dh[i] - dh_of_pairs) > delta_h, \ + np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \ + np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)): + notcoverd.append(i) + + # if rank == 0: + # print(f"{rank=} {notcoverd=}") + notcoverd = np.array(notcoverd) + istart - # if rank == 0: - # print(f"{rank=} {notcoverd=}") - notcoverd = np.array(notcoverd) + istart - - comm.Barrier() - # print(f"{rank=} {len(notcoverd)=}") - # Gather the sizes of the notcoverd arrays from all processes - notcoverd_sizes = comm.gather(len(notcoverd), root=0) + comm.Barrier() + # print(f"{rank=} {len(notcoverd)=}") + # Gather the sizes of the notcoverd arrays from all processes + notcoverd_sizes = comm.gather(len(notcoverd), root=0) - if rank == 0: - # Calculate the total size and create a buffer to hold all data - total_size = sum(notcoverd_sizes) - all_notcoverd = np.empty(total_size, dtype=int) - else: - all_notcoverd = None + if rank == 0: + # Calculate the total size and create a buffer to hold all data + total_size = sum(notcoverd_sizes) + all_notcoverd = np.empty(total_size, dtype=int) + else: + all_notcoverd = None + + # Gather all notcoverd arrays to the root process + comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0) + notcoverd = all_notcoverd + if rank == 0: + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") + print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") + return notcoverd + - # Gather all notcoverd arrays to the root process - comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0) - notcoverd = all_notcoverd - if rank == 0: - print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") - print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") + notcoverd = get_not_covered_points(rank, nprocs) @@ -1956,19 +1959,29 @@ def lor(a, b, c): # print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") notcoverd = [] - print(f"\t\tchecking the uncovered points") - for j in tqdm(range(len(indexes))): - i = indexes[j] - if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): - notcoverd.append(i) - print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + + # print(f"\t\tchecking the uncovered points") + # for j in tqdm(range(len(indexes))): + # i = indexes[j] + # if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + # notcoverd.append(i) + # print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + notcoverd = get_not_covered_points(rank, nprocs) iteration += 1 - + # final check for the not covered points + if nprocs > 1: + notcoverd = get_not_covered_points(rank, nprocs) + if rank == 0: + print("final check") + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + + print("Optimization done") print(f"Number of pairs founded: {n_computed_pairs}") print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}") @@ -1996,7 +2009,7 @@ def lor(a, b, c): comm.Barrier() - if using_vectorize_manner==True and longer_version==True: + if (using_vectorize_manner==True) and (longer_version==True): print("Using vectorize manner and longer version") if rank == 0: # make the factor 0.99 to avoid the error in the digitize function From 5c6067298db59973992cf456dd833357520f6965 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Wed, 11 Sep 2024 03:49:42 -0700 Subject: [PATCH 03/17] modifying the parallel compute green function pairs 2 --- shakermaker/shakermaker.py | 146 +++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 78 deletions(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index c12b813..20f7d34 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1841,69 +1841,65 @@ def lor(a, b, c): comm.Barrier() - def get_not_covered_points(rank, nprocs): - if rank > 0: - dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double) - - comm.Barrier() - # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks - if rank == 0: - for i in range(1, nprocs): - comm.Send(dh_of_pairs[:n_computed_pairs], dest=i, tag=5) - comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6) - comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7) - else: - comm.Recv(dh_of_pairs, source=0, tag=5) - comm.Recv(zrec_of_pairs, source=0, tag=6) - comm.Recv(zsrc_of_pairs, source=0, tag=7) - comm.Barrier() - # now found uncovered points for each rank - notcoverd = [] - if rank == 0: - # measure the percentage of the cheking - print(f"\t\tchecking the uncovered points") - for i in tqdm(range(iend)): - if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): - notcoverd.append(i) - - else: - for i in range(len(mini_dh)): - if np.all(lor(np.abs(mini_dh[i] - dh_of_pairs) > delta_h, \ - np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \ - np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)): - notcoverd.append(i) - - # if rank == 0: - # print(f"{rank=} {notcoverd=}") - notcoverd = np.array(notcoverd) + istart - - comm.Barrier() - # print(f"{rank=} {len(notcoverd)=}") - # Gather the sizes of the notcoverd arrays from all processes - notcoverd_sizes = comm.gather(len(notcoverd), root=0) - if rank == 0: - # Calculate the total size and create a buffer to hold all data - total_size = sum(notcoverd_sizes) - all_notcoverd = np.empty(total_size, dtype=int) - else: - all_notcoverd = None - - # Gather all notcoverd arrays to the root process - comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0) - notcoverd = all_notcoverd - if rank == 0: - print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") - print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") - return notcoverd + if rank > 0: + dh_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zrec_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + zsrc_of_pairs = np.empty(n_computed_pairs, dtype=np.double) + comm.Barrier() + # send dh_of_pairs, dv_of_pairs, zrec_of_pairs, zsrc_of_pairs to all ranks + if rank == 0: + for i in range(1, nprocs): + comm.Send(dh_of_pairs[:n_computed_pairs], dest=i, tag=5) + comm.Send(zrec_of_pairs[:n_computed_pairs], dest=i, tag=6) + comm.Send(zsrc_of_pairs[:n_computed_pairs], dest=i, tag=7) + else: + comm.Recv(dh_of_pairs, source=0, tag=5) + comm.Recv(zrec_of_pairs, source=0, tag=6) + comm.Recv(zsrc_of_pairs, source=0, tag=7) + comm.Barrier() + # now found uncovered points for each rank + notcoverd = [] + if rank == 0: + # measure the percentage of the cheking + print(f"\t\tchecking the uncovered points") + for i in tqdm(range(iend)): + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + + else: + for i in range(len(mini_dh)): + if np.all(lor(np.abs(mini_dh[i] - dh_of_pairs) > delta_h, \ + np.abs(mini_z_src[i] - zsrc_of_pairs) > delta_v_src, \ + np.abs(mini_z_rec[i] - zrec_of_pairs) > delta_v_rec)): + notcoverd.append(i) + + # if rank == 0: + # print(f"{rank=} {notcoverd=}") + notcoverd = np.array(notcoverd) + istart + + comm.Barrier() + # print(f"{rank=} {len(notcoverd)=}") + # Gather the sizes of the notcoverd arrays from all processes + notcoverd_sizes = comm.gather(len(notcoverd), root=0) + + if rank == 0: + # Calculate the total size and create a buffer to hold all data + total_size = sum(notcoverd_sizes) + all_notcoverd = np.empty(total_size, dtype=int) + else: + all_notcoverd = None - notcoverd = get_not_covered_points(rank, nprocs) + # Gather all notcoverd arrays to the root process + comm.Gatherv(sendbuf=notcoverd, recvbuf=(all_notcoverd, notcoverd_sizes), root=0) + notcoverd = all_notcoverd + if rank == 0: + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") + print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") @@ -1959,27 +1955,21 @@ def get_not_covered_points(rank, nprocs): # print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") notcoverd = [] - - # print(f"\t\tchecking the uncovered points") - # for j in tqdm(range(len(indexes))): - # i = indexes[j] - # if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): - # notcoverd.append(i) - # print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") - notcoverd = get_not_covered_points(rank, nprocs) + print(f"\t\tchecking the uncovered points") + for j in tqdm(range(len(indexes))): + i = indexes[j] + if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + notcoverd.append(i) + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") iteration += 1 - # final check for the not covered points - if nprocs > 1: - notcoverd = get_not_covered_points(rank, nprocs) - if rank == 0: - print("final check") - print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + # final checking + # for j in tqdm(range(len(dh))): + print("Optimization done") From 44c10af9ef6355ef3ce73d4f5060167609f3fafd Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Wed, 11 Sep 2024 19:41:57 -0700 Subject: [PATCH 04/17] modifying the parallel compute green function pairs 3 --- shakermaker/shakermaker.py | 271 ++++++++++++++++++------------------- 1 file changed, 133 insertions(+), 138 deletions(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index 20f7d34..ededa9d 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -913,6 +913,7 @@ def printMPI(*args): ipair_target = best_match_index else: print(f"No suitable match found! {allow_out_of_bounds=} {min_distance=}") + print(f"{rank=} {i_station=} {i_psource=}") if ipair_target == len(dh_of_pairs): print("Target not found in database -- SKIPPING") @@ -1564,7 +1565,7 @@ def gen_greens_function_database_pairs(self, npairs_max=100000, using_vectorize_manner=False, cfactor = 0.5, - longer_version=True + finalcheck=True, ): """Run the simulation. @@ -1684,7 +1685,6 @@ def lor(a, b, c): dh = np.linalg.norm(receiver_matrix[:, np.newaxis, :] - source_matrix[np.newaxis, :, :], axis=2) dh = dh.flatten() - # compute the vertical distance matrix print ("Computing the vertical distance matrix") z_rec, z_src = np.meshgrid(z_rec, z_src, indexing='ij') @@ -1711,28 +1711,31 @@ def lor(a, b, c): else: comm.Recv(mini_dh, source=0, tag=1) comm.Recv(mini_z_rec, source=0, tag=2) - comm.Recv(mini_z_src, source=0, tag=3) - elif nprocs == 1: - print("running on one processor") - istart = 0 - iend = toatalnumberofpairs + comm.Recv(mini_z_src, source=0, tag=3) comm.Barrier() - if using_vectorize_manner==True and longer_version==False: + if using_vectorize_manner==True: if rank == 0: # Creating bins print ("Computing the bins") eps = 1e-8 dh_max , dh_min = np.max(dh), np.min(dh) - dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor) + binsnumber = int((dh_max - dh_min) // (delta_h*factor) + 1) + dh_bins = np.linspace(dh_min, dh_max + eps, binsnumber+1) + assert np.all(np.diff(dh_bins) <= delta_h*factor) + dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec) - dzrec_bins = np.arange(dzrec_min, dzrec_max + eps, delta_v_rec*factor) + binsnumber = int((dzrec_max - dzrec_min) // (delta_v_rec*factor) + 1) + dzrec_bins = np.linspace(dzrec_min, dzrec_max + eps, binsnumber+1) + assert np.all(np.diff(dzrec_bins) <= delta_v_rec*factor) dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src) - dzsrc_bins = np.arange(dzsrc_min, dzsrc_max + eps, delta_v_src*factor) + binsnumber = int((dzsrc_max - dzsrc_min) // (delta_v_src*factor) + 1) + dzsrc_bins = np.linspace(dzsrc_min, dzsrc_max + eps, binsnumber+1) + assert np.all(np.diff(dzsrc_bins) <= delta_v_src*factor) # computing total number of bins num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins) @@ -1803,17 +1806,17 @@ def lor(a, b, c): n_computed_pairs += 1 - + print(f"\t\tnumber of pairs founded: {n_computed_pairs}") # if j % 10000 == 0 or j == len(indices) - 1: # print(f"\t\t\tOn {j=} of {npairs_max} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") # wait for all ranks to reach here comm.Barrier() - if nprocs ==1 : + print("using single processor:") # now check that the founded pairs cover all the points notcoverd = [] - for i in range(len(dh)): + for i in tqdm(range(len(dh))): if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): @@ -1823,7 +1826,11 @@ def lor(a, b, c): print(f"\t\tnumber of pairs founded: {n_computed_pairs}") print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}") + # print notcoverd in a file + np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d') else: + + if rank == 0: # On rank 0, calculate or set the value of n_computed_pairs n_computed_pairs = np.array(n_computed_pairs, dtype='i') # Example value @@ -1900,6 +1907,8 @@ def lor(a, b, c): print(f"\t\tnumber of pairs founded: {n_computed_pairs}") print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") + # print notcoverd in a file + np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d') @@ -1920,6 +1929,24 @@ def lor(a, b, c): # digitizing the data + eps = 1e-8 + factor = 0.99 + dh_max , dh_min = np.max(dh), np.min(dh) + binsnumber = int((dh_max - dh_min) // (delta_h*factor) + 1) + dh_bins = np.linspace(dh_min-eps, dh_max + eps, binsnumber+1) + assert np.all(np.diff(dh_bins) <= delta_h*factor) + + + dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec) + binsnumber = int((dzrec_max - dzrec_min) // (delta_v_rec*factor) + 1) + dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max + eps, binsnumber+1) + assert np.all(np.diff(dzrec_bins) <= delta_v_rec*factor) + + dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src) + binsnumber = int((dzsrc_max - dzsrc_min) // (delta_v_src*factor) + 1) + dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max + eps, binsnumber+1) + assert np.all(np.diff(dzsrc_bins) <= delta_v_src*factor) + dh_digitized = np.digitize(dh[notcoverd], dh_bins) z_rec_digitized = np.digitize(z_rec[notcoverd], dzrec_bins) z_src_digitized = np.digitize(z_src[notcoverd], dzsrc_bins) @@ -1933,15 +1960,17 @@ def lor(a, b, c): indices = np.sort(indices) indices = indexes[indices] + - print(f"\t\treducing the number of pairs:") + # print(f"\t\treducing the number of pairs:") for j in tqdm(range(len(indices))): i = indices[j] - condition = lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) - if np.all(condition): + # condition = lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec) + # if np.all(condition): + if True: istation = i // len(self._source._pslist) ipsource = i % len(self._source._pslist) pairs_to_compute[n_computed_pairs,:] = [istation, ipsource] @@ -1955,23 +1984,33 @@ def lor(a, b, c): # print(f"\t\t\tOn {j=} of {len(indices)} {n_computed_pairs} ({n_computed_pairs/npairs*100}% reduction)") notcoverd = [] - print(f"\t\tchecking the uncovered points") - for j in tqdm(range(len(indexes))): - i = indexes[j] - if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ - np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ - np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): - notcoverd.append(i) - print(f"\t\tnumber of pairs founded: {n_computed_pairs}") - print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + # print(f"\t\tchecking the uncovered points") + # for j in tqdm(range(len(indexes))): + # i = indexes[j] + # if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + # notcoverd.append(i) + # print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") iteration += 1 + # if finalcheck: + # if rank == 0: + # print("final check") + # # checking the not covered points + # notcoverd = [] + # for i in range(len(dh)): + # if np.all(lor(np.abs(dh[i] - dh_of_pairs[:n_computed_pairs]) > delta_h, \ + # np.abs(z_src[i] - zsrc_of_pairs[:n_computed_pairs]) > delta_v_src, \ + # np.abs(z_rec[i] - zrec_of_pairs[:n_computed_pairs]) > delta_v_rec)): + # notcoverd.append(i) + # if i % (len(dh)//10) == 0: + # print(f"{((i+1)//len(dh)*100)}%") + # print(f"\t\tnumber of pairs founded: {n_computed_pairs}") + # print(f"\t\tnumber of points not covered yet: {len(notcoverd)}") + # print timin - # final checking - # for j in tqdm(range(len(dh))): - - - print("Optimization done") print(f"Number of pairs founded: {n_computed_pairs}") print(f"Number of pairs skipped: {npairs_max - n_computed_pairs}") @@ -1999,111 +2038,6 @@ def lor(a, b, c): comm.Barrier() - if (using_vectorize_manner==True) and (longer_version==True): - print("Using vectorize manner and longer version") - if rank == 0: - # make the factor 0.99 to avoid the error in the digitize function - factor = 0.98 - # Creating bins - print ("Computing the bins") - eps = 1e-8 - dh_max , dh_min = np.max(dh), np.min(dh) - # dh_bins = np.arange(dh_min, dh_max + eps, delta_h*factor) - num = (dh_max - dh_min) / (delta_h*factor) - # celing the number of bins - num = np.ceil(num) - num = int(num) - # bins should include the maximum value - dh_bins = np.linspace(dh_min-eps, dh_max+eps, num+1) - - - dzrec_max , dzrec_min = np.max(z_rec), np.min(z_rec) - num = (dzrec_max - dzrec_min) / (delta_v_rec*factor) - # celing the number of bins - num = np.ceil(num) - num = int(num) - # bins should include the maximum value - dzrec_bins = np.linspace(dzrec_min-eps, dzrec_max+eps, num+1) - - - dzsrc_max , dzsrc_min = np.max(z_src), np.min(z_src) - num = (dzsrc_max - dzsrc_min) / (delta_v_src*factor) - # celing the number of bins - num = np.ceil(num) - num = int(num) - # bins should include the maximum value - dzsrc_bins = np.linspace(dzsrc_min-eps, dzsrc_max+eps, num+1) - - - # verify that the bins differ by delta_h, delta_v_rec and delta_v_src - assert np.all(np.diff(dh_bins) - delta_h < 1e-8) - assert np.all(np.diff(dzrec_bins) - delta_v_rec < 1e-8) - assert np.all(np.diff(dzsrc_bins) - delta_v_src < 1e-8) - - - # computing total number of bins - num_bins = len(dh_bins) * len(dzrec_bins) * len(dzsrc_bins) - print(f"Total number of bins: {num_bins}") - - # digitizing the data - print ("Digitizing the data") - dh_digitized = np.digitize(dh, dh_bins) - z_rec_digitized = np.digitize(z_rec, dzrec_bins) - z_src_digitized = np.digitize(z_src, dzsrc_bins) - - - - - # creat matrix of pairs - print ("Creating the pairs") - dh_digitized -= 1 - z_rec_digitized -= 1 - z_src_digitized -= 1 - # pairs_to_compute =[categories[dh_digitized[i], z_rec_digitized[i], z_src_digitized[i]] for i in range(npairs)] - pairs_to_compute = dh_digitized * len(dzrec_bins) * len(dzsrc_bins) + z_rec_digitized * len(dzsrc_bins) + z_src_digitized - pairs_to_compute, indices,reverseIndicies = np.unique(pairs_to_compute, return_index=True,return_inverse=True) - indices = np.sort(indices) - i_station = indices // len(self._source._pslist) - i_psource = indices % len(self._source._pslist) - - - - - - - - npairs_max = len(pairs_to_compute) - pairs_to_compute = np.empty((npairs_max, 2), dtype=np.int32) - dd_of_pairs = np.empty(npairs_max, dtype=np.double) - dh_of_pairs = np.empty(npairs_max, dtype=np.double) - dv_of_pairs = np.empty(npairs_max, dtype=np.double) - zrec_of_pairs = np.empty(npairs_max, dtype=np.double) - zsrc_of_pairs = np.empty(npairs_max, dtype=np.double) - - print(f"npairs_max = {npairs_max}") - # ==================================================================================================== - # optimizing the number of pairs - # ==================================================================================================== - - - - n_computed_pairs = 0 - print ("Optimizing the number of pairs") - print("\titeration 1") - print("\t\treducing the number of pairs:") - - # reducing the number of pairs - - for j in tqdm(range(len(indices))): - i = indices[j] - pairs_to_compute[n_computed_pairs,:] = [i_station[j], i_psource[j]] - dd_of_pairs[n_computed_pairs] = dd[i] - dh_of_pairs[n_computed_pairs] = dh[i] - dv_of_pairs[n_computed_pairs] = dv[i] - zrec_of_pairs[n_computed_pairs] = z_rec[i] - zsrc_of_pairs[n_computed_pairs] = z_src[i] - n_computed_pairs += 1 - if using_vectorize_manner==False: srart = perf_counter() @@ -2219,6 +2153,67 @@ def lor(a, b, c): # return dists, pairs_to_compute, dh_of_pairs, dv_of_pairs, zrec_of_pairs, zrec_of_pairs return + + + def check_greens_function_database_pairs(self, h5_database_name, delta_h=0.04, delta_v_rec=0.002, delta_v_src=0.2): + import h5py + if rank == 0: + hfile = h5py.File(h5_database_name + '.h5', 'r+') + + pairs_to_compute = hfile["/pairs_to_compute"][:] + dh_of_pairs = hfile["/dh_of_pairs"][:] + zrec_of_pairs = hfile["/zrec_of_pairs"][:] + zsrc_of_pairs = hfile["/zsrc_of_pairs"][:] + f = open("check.txt", "w") + bar = tqdm(total=self._receivers.nstations) + bar2 = tqdm(total=self._source.nsources) + for i_station, station in enumerate(self._receivers): + bar.update(1) + bar2.reset() + for i_psource, psource in enumerate(self._source): + bar2.update(1) + + x_src = psource.x + x_rec = station.x + + z_src = psource.x[2] + z_rec = station.x[2] + + d = x_rec - x_src + dh = np.sqrt(np.dot(d[0:2],d[0:2])) + + min_distance = float('inf') + best_match_index = -1 + + for i in range(len(dh_of_pairs)): + dh_p, zrec_p, zsrc_p = dh_of_pairs[i], zrec_of_pairs[i], zsrc_of_pairs[i] + + # Check if the current pair is within the tolerances + if (abs(dh - dh_p) < delta_h and abs(z_src - zsrc_p) < delta_v_src and abs(z_rec - zrec_p) < delta_v_rec): + best_match_index = i + + if best_match_index == -1: + # print(f"No suitable match found! {min_distance=}") + # print(f"{rank=} {i_station=} {i_psource=}") + # write on the file + f.write(f"{rank=} {i_station=} {i_psource=}\n") + if i_psource % 1000 == 0: + # flush the file + f.flush() + f.close() + bar.close() + bar2.close() + + + + + + + + + + + def write(self, writer): writer.write(self._receivers) From 187df6af463109bc1cb99034b547799af91597d9 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Sun, 15 Sep 2024 13:28:04 -0700 Subject: [PATCH 05/17] modifying the parallel compute green function pairs 4 --- shakermaker/shakermaker.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index ededa9d..2dde715 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1646,8 +1646,8 @@ def lor(a, b, c): for i in range(nprocs): istartList.append(i * toatalnumberofpairs // nprocs) iendList.append((i + 1) * toatalnumberofpairs // nprocs) - print(f"{istartList=}") - print(f"{iendList=}") + # print(f"{istartList=}") + # print(f"{iendList=}") @@ -1826,8 +1826,8 @@ def lor(a, b, c): print(f"\t\tnumber of pairs founded: {n_computed_pairs}") print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") print(f"\t\tnumber of pairs notcovered yet: {len(notcoverd)}") - # print notcoverd in a file - np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d') + # # print notcoverd in a file + # np.savetxt("notcoverd_single.txt", notcoverd, fmt='%d') else: @@ -1907,8 +1907,8 @@ def lor(a, b, c): print(f"\t\tnumber of pairs founded: {n_computed_pairs}") print(f"\t\tnumber of pairs skipped: {npairs_max - n_computed_pairs}") print(f"\t\tnumber of pairs notcovered yet: {len(all_notcoverd)}") - # print notcoverd in a file - np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d') + # # print notcoverd in a file + # np.savetxt("notcoverd_multi.txt", notcoverd, fmt='%d') From ccae819b5a5b780eda3ace201fa94d7a73055845 Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Mon, 28 Oct 2024 03:10:43 -0700 Subject: [PATCH 06/17] Create IntelSetup.py This file contains the flags for compiling ShakerMaker with intel compilers. --- IntelSetup.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 IntelSetup.py diff --git a/IntelSetup.py b/IntelSetup.py new file mode 100644 index 0000000..49fc8cf --- /dev/null +++ b/IntelSetup.py @@ -0,0 +1,154 @@ +from numpy.distutils.core import Extension, setup + + +name = "shakermaker" +version = "0.1" +release = "0.01" +author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren" + +srcdir = "shakermaker/core/" +srcs = [ + srcdir + 'core.pyf', + srcdir + "subgreen.f", + srcdir + "subgreen2.f", + srcdir + "subfocal.f", + srcdir + "subfk.f", + srcdir + "subtrav.f", + srcdir + "tau_p.f", + srcdir + "kernel.f", + srcdir + "prop.f", + srcdir + "source.f", + srcdir + "bessel.f", + srcdir + "haskell.f", + srcdir + "fft.c", + srcdir + "Complex.c" +] + + +# ext_modules = [ +# Extension ( +# name='shakermaker.core', +# sources=srcs, +# extra_f77_compile_args=["-ffixed-line-length-132", "-Wno-tabs", "-Wno-unused-dummy-argument", "-fPIC"], +# extra_compile_args=["-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION"] +# ) +# ] + + +ext_modules = [ + Extension ( + name='shakermaker.core', + sources=srcs, + extra_f77_compile_args=["-132", + "-w", + "-fPIC", + "-O3", + "-xHost", + "-qopenmp", + "-ipo" + ], + extra_compile_args=[ + "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION", + "-xHost", # Optimize for the host machine's processor + "-O3", # Aggressive optimizations + "-qopenmp" # If you are using OpenMP, enables parallelization + ] + ) + ] + + + +''' + +ext_modules = [ + Extension( + name='shakermaker.core', + sources=srcs, + extra_f77_compile_args=[ + "-O1", # Safe optimization level: balances performance and stability + "-xHost", # Optimizes for the current machine's processor + "-qopenmp", # Enables parallelization using OpenMP (if the code supports it) + "-ipo", # Interprocedural optimization across multiple files for better performance + "-fPIC" # Ensures position-independent code for shared libraries + ], + extra_compile_args=[ + "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION", # Ensures compatibility with older NumPy APIs + "-xHost", # Processor-specific optimizations for maximum performance + "-O1", # Safe optimization level + "-qopenmp" # Enables parallelization + ] + ) +] + +''' + + + + + + + +# ext_modules = [ +# Extension ( +# name='shakermaker.core', +# sources=srcs, +# extra_f77_compile_args=[ +# "-extend-source", # Equivalent to -ffixed-line-length-132 +# "-w", # Suppresses all warnings, including tabs and unused dummy arguments +# "-fPIC" # Position-independent code, same as with GCC +# ], +# extra_compile_args=[ +# "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION", +# # "-xHost", # Optimize for the host machine's processor +# # "-O3", # Aggressive optimizations +# # "-qopenmp" # If you are using OpenMP, enables parallelization +# ] +# ) +# ] + + + + +cmdclass = {} +command_options = {} + +setup( + name = name, + package_dir = {'shakermaker': 'shakermaker'}, + packages = [ + "shakermaker", + "shakermaker.cm_library", + "shakermaker.sl_extensions", + "shakermaker.slw_extensions", + "shakermaker.stf_extensions", + "shakermaker.tools", + ], + ext_modules = ext_modules, + version = version, + description = "README.md", + author = author, + author_email = "info@joseabell.com", + url = "http://www.joseabell.com", + download_url = "tbd", + keywords = ["earthquake", "engineering", "drm", "simulation"], + classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Fortran", + "Development Status :: Released", + "Environment :: Other Environment", + "Intended Audience :: Savvy Earthquake Engineers", + "License :: GPL", + "Operating System :: OS Independent", + "Topic :: TBD", + "Topic :: TBD2", + ], + long_description = """\ + shakermaker + ------------------------------------- + Create realistic seismograms! + + """, + cmdclass = cmdclass, + command_options = command_options, +) From 6e4ca71ee92820a1b5a89c6ea40403a781f5e5c8 Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Mon, 28 Oct 2024 22:18:43 -0700 Subject: [PATCH 07/17] Update shakermaker.py to create DRM points coordinates for visualization --- shakermaker/shakermaker.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index 2dde715..15df05c 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1566,6 +1566,7 @@ def gen_greens_function_database_pairs(self, using_vectorize_manner=False, cfactor = 0.5, finalcheck=True, + StationType="DRM", ): """Run the simulation. @@ -1670,6 +1671,19 @@ def lor(a, b, c): for i_station, station in enumerate(self._receivers): receiver_matrix[i_station, :] = station.x + + # creating DRM stations in csv file + if StationType.lower() == "drm": + if not os.path.dirname(store_here): # If the path is just a filename + mother_directory = os.getcwd() # Get the current working directory + else: + mother_directory = os.path.dirname(os.path.abspath(store_here)) + + np.savetxt(mother_directory + "/DRMPoints.csv", source_matrix, delimiter=",", header="x,y,z", comments="") + + + + z_rec = receiver_matrix[:,2] z_src = source_matrix[:,2] From dd0c8959fbae3e62524d292518a9757dd7bbf61c Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Mon, 28 Oct 2024 22:20:41 -0700 Subject: [PATCH 08/17] adding import os, for generate DRMPoints coordinates --- shakermaker/shakermaker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index 15df05c..ed761cf 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1674,6 +1674,7 @@ def lor(a, b, c): # creating DRM stations in csv file if StationType.lower() == "drm": + import os if not os.path.dirname(store_here): # If the path is just a filename mother_directory = os.getcwd() # Get the current working directory else: From dea6194b0b0ff503591e145f072cac6031c15748 Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Mon, 28 Oct 2024 23:10:15 -0700 Subject: [PATCH 09/17] replacing source matrix by receiver matrix --- shakermaker/shakermaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shakermaker/shakermaker.py b/shakermaker/shakermaker.py index ed761cf..d1c0048 100644 --- a/shakermaker/shakermaker.py +++ b/shakermaker/shakermaker.py @@ -1680,7 +1680,7 @@ def lor(a, b, c): else: mother_directory = os.path.dirname(os.path.abspath(store_here)) - np.savetxt(mother_directory + "/DRMPoints.csv", source_matrix, delimiter=",", header="x,y,z", comments="") + np.savetxt(mother_directory + "/DRMPoints.csv", receiver_matrix, delimiter=",", header="x,y,z", comments="") From a29c8296484cef970a8d4b66c6ca8cc91e423edb Mon Sep 17 00:00:00 2001 From: amnp95 Date: Thu, 7 Nov 2024 14:25:46 -0800 Subject: [PATCH 10/17] adding windows compiling instructions --- IntelWindowsSetup.py | 99 ++++++++++++++++++++++++++++++++++++++++++++ windows.bat | 17 ++++++++ 2 files changed, 116 insertions(+) create mode 100644 IntelWindowsSetup.py create mode 100644 windows.bat diff --git a/IntelWindowsSetup.py b/IntelWindowsSetup.py new file mode 100644 index 0000000..0ed5762 --- /dev/null +++ b/IntelWindowsSetup.py @@ -0,0 +1,99 @@ +import os +from numpy.distutils.core import Extension, setup + +# Project information +name = "shakermaker" +version = "0.1" +release = "0.01" +author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren" + +# Source files +srcdir = "shakermaker/core/" +srcs = [ + srcdir + 'core.pyf', + srcdir + "subgreen.f", + srcdir + "subgreen2.f", + srcdir + "subfocal.f", + srcdir + "subfk.f", + srcdir + "subtrav.f", + srcdir + "tau_p.f", + srcdir + "kernel.f", + srcdir + "prop.f", + srcdir + "source.f", + srcdir + "bessel.f", + srcdir + "haskell.f", + srcdir + "fft.c", + srcdir + "Complex.c" +] + +# Set Intel compilers for C and Fortran +os.environ['FC'] = 'ifx' # Intel Fortran Compiler +os.environ['CC'] = 'icx' # Intel C Compiler +os.environ['CXX'] = 'icx' # Intel C++ Compiler +os.environ['F77'] = 'ifx' # Set Fortran 77 compiler + + + +# Compiler flags for Intel compilers +extra_f77_compile_args = [ + "/extend-source:132", # For Fortran line length compatibility + "/Qparallel", # Parallel compilation + "/O1", # Optimization level 3 + "/Qopenmp", # OpenMP support +] + +extra_compile_args = [ + "/DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION", + "/QxHost", # Optimize for host machine's processor + "/O1", # Aggressive optimization + "/Qopenmp", # Enable OpenMP for parallel processing +] + +# Define extension modules +ext_modules = [ + Extension( + name='shakermaker.core', + sources=srcs, + extra_f77_compile_args=extra_f77_compile_args, + extra_compile_args=extra_compile_args, + include_dirs=["D:/Programs/Intel/oneAPI/include"], # Adjust to your include path + library_dirs=["D:/Programs/Intel/oneAPI/lib"] # Adjust to your library path + ) +] + +# Define setup configuration +setup( + name=name, + package_dir={'shakermaker': 'shakermaker'}, + packages=[ + "shakermaker", + "shakermaker.cm_library", + "shakermaker.sl_extensions", + "shakermaker.slw_extensions", + "shakermaker.stf_extensions", + "shakermaker.tools", + ], + ext_modules=ext_modules, + version=version, + description="README.md", + author=author, + author_email="info@joseabell.com", + url="http://www.joseabell.com", + download_url="tbd", + keywords=["earthquake", "engineering", "drm", "simulation"], + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Fortran", + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License (GPL)", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering", + ], + long_description=""" + shakermaker + ------------------------------------- + Create realistic seismograms! + """, +) diff --git a/windows.bat b/windows.bat new file mode 100644 index 0000000..b9f1649 --- /dev/null +++ b/windows.bat @@ -0,0 +1,17 @@ +python -m venv myenv +call .\myenv\Scripts\activate +call pip install setuptools +call pip install numpy==1.23.5 +call pip install scipy==1.11.4 +call pip install matplotlib +call pip install geopy +call pip install pyproj +call pip install tqdm +call pip install h5py +call pip install mpi4py + +@REM replace the path with your own path to the Intel oneAPI installation +call "D:\Programs\Intel\oneAPI\setvars.bat" +python IntelWindowsSetup.py build +python IntelWindowsSetup.py install + From d23f071e4e30fccd38e912caa8af739dcd6a5f98 Mon Sep 17 00:00:00 2001 From: Amin Pakzad Date: Mon, 28 Apr 2025 11:34:56 -0700 Subject: [PATCH 11/17] Create Stampede3Setup.py --- Stampede3Setup.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 Stampede3Setup.py diff --git a/Stampede3Setup.py b/Stampede3Setup.py new file mode 100644 index 0000000..ac46aac --- /dev/null +++ b/Stampede3Setup.py @@ -0,0 +1,88 @@ +from numpy.distutils.core import Extension, setup + + +name = "shakermaker" +version = "0.1" +release = "0.01" +author = "Jose A. Abell, Jorge Crempien D., and Matias Recabarren" + +srcdir = "shakermaker/core/" +srcs = [ + srcdir + 'core.pyf', + srcdir + "subgreen.f", + srcdir + "subgreen2.f", + srcdir + "subfocal.f", + srcdir + "subfk.f", + srcdir + "subtrav.f", + srcdir + "tau_p.f", + srcdir + "kernel.f", + srcdir + "prop.f", + srcdir + "source.f", + srcdir + "bessel.f", + srcdir + "haskell.f", + srcdir + "fft.c", + srcdir + "Complex.c" +] + +# Minimal flags for compatibility +ext_modules = [ + Extension( + name='shakermaker.core', + sources=srcs, + extra_f77_compile_args=[ + "-132", # Fixed format 132 character line length + "-w", # Disable warnings + "-fPIC", # Position Independent Code + "-O2", # Moderate optimization level + ], + extra_compile_args=[ + "-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION", + "-O2", # Moderate optimization level + "-fPIC" # Position Independent Code + ] + ) +] + +cmdclass = {} +command_options = {} + +setup( + name=name, + package_dir={'shakermaker': 'shakermaker'}, + packages=[ + "shakermaker", + "shakermaker.cm_library", + "shakermaker.sl_extensions", + "shakermaker.slw_extensions", + "shakermaker.stf_extensions", + "shakermaker.tools", + ], + ext_modules=ext_modules, + version=version, + description="README.md", + author=author, + author_email="info@joseabell.com", + url="http://www.joseabell.com", + download_url="tbd", + keywords=["earthquake", "engineering", "drm", "simulation"], + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Fortran", + "Development Status :: Released", + "Environment :: Other Environment", + "Intended Audience :: Savvy Earthquake Engineers", + "License :: GPL", + "Operating System :: OS Independent", + "Topic :: TBD", + "Topic :: TBD2", + ], + long_description="""\ + shakermaker + ------------------------------------- + Create realistic seismograms! + + """, + cmdclass=cmdclass, + command_options=command_options, +) From 854dc82da5fdcd9dad7a9fbbd636b409acf8f0a1 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 11:50:19 -0700 Subject: [PATCH 12/17] organizing setup files for different systems --- IntelSetup.py => tacc/frontera.py | 0 Stampede3Setup.py => tacc/stampede3.py | 0 windows.bat => tacc/windows.bat | 0 IntelWindowsSetup.py => tacc/windows.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename IntelSetup.py => tacc/frontera.py (100%) rename Stampede3Setup.py => tacc/stampede3.py (100%) rename windows.bat => tacc/windows.bat (100%) rename IntelWindowsSetup.py => tacc/windows.py (100%) diff --git a/IntelSetup.py b/tacc/frontera.py similarity index 100% rename from IntelSetup.py rename to tacc/frontera.py diff --git a/Stampede3Setup.py b/tacc/stampede3.py similarity index 100% rename from Stampede3Setup.py rename to tacc/stampede3.py diff --git a/windows.bat b/tacc/windows.bat similarity index 100% rename from windows.bat rename to tacc/windows.bat diff --git a/IntelWindowsSetup.py b/tacc/windows.py similarity index 100% rename from IntelWindowsSetup.py rename to tacc/windows.py From 6b18a255ac2c1c74d47eff8f924f624252864851 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 11:55:13 -0700 Subject: [PATCH 13/17] add stampede3.sh script for building setup --- tacc/stampede3.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tacc/stampede3.sh diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh new file mode 100644 index 0000000..897f102 --- /dev/null +++ b/tacc/stampede3.sh @@ -0,0 +1,12 @@ +clear +ml phdf5 +rm -rf build + +export CC=icx +export CXX=icpx +export FC=ifx +export F77=ifx +export F90=ifx + +python3 IntelSetup.py build +# python3 IntelSetup.py install \ No newline at end of file From 4925893b5354475fae32232a8d8df752efa52bc6 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 11:56:01 -0700 Subject: [PATCH 14/17] update stampede3.sh to use stampede3.py for build process --- tacc/stampede3.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh index 897f102..4a69195 100644 --- a/tacc/stampede3.sh +++ b/tacc/stampede3.sh @@ -8,5 +8,5 @@ export FC=ifx export F77=ifx export F90=ifx -python3 IntelSetup.py build -# python3 IntelSetup.py install \ No newline at end of file +python3 stampede3.py build +# python3 stampede3.py install \ No newline at end of file From 4f4d5b256351bdb47e0d6f4f5a7b2cca1d122054 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 12:11:21 -0700 Subject: [PATCH 15/17] refactor stampede3.sh to improve formatting and organization of dependency installation --- tacc/stampede3.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh index 4a69195..246db4a 100644 --- a/tacc/stampede3.sh +++ b/tacc/stampede3.sh @@ -2,11 +2,18 @@ clear ml phdf5 rm -rf build +# build an envirormrnt and activate it (not necessary, but recommended) + +# installing the dependencies +pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm + + export CC=icx export CXX=icpx export FC=ifx export F77=ifx export F90=ifx + python3 stampede3.py build # python3 stampede3.py install \ No newline at end of file From 42be0ecc56b548448684cb620cb7890b3a4bb604 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 12:40:07 -0700 Subject: [PATCH 16/17] enhance stampede3.sh by adding missing dependencies and improving environment setup --- tacc/stampede3.sh | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh index 246db4a..01fda00 100644 --- a/tacc/stampede3.sh +++ b/tacc/stampede3.sh @@ -1,19 +1,46 @@ +# building environment for stampede3 should be done in a idev mode +# idev + +# after the idev command, you will be in a new shell clear ml phdf5 -rm -rf build +ml python/3.9.18 + # build an envirormrnt and activate it (not necessary, but recommended) +ENVNAME=ShakerMaker_env +# ENVNAME=ShakerMakerTestingEnv + +python3 -m venv $ENVNAME +source $ENVNAME/bin/activate + + # installing the dependencies -pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm +pip install numpy==1.23.0 scipy==1.11.1 matplotlib geopy pyproj tqdm h5py mpi4py + + +# install the ShakerMaker package +# clone the repository +git clone https://github.com/amnp95/ShakerMaker.git +cd ShakerMaker +cp tacc/stampede3.py . +# set the environment variables for the Intel compiler export CC=icx export CXX=icpx export FC=ifx export F77=ifx export F90=ifx - +# build the package python3 stampede3.py build -# python3 stampede3.py install \ No newline at end of file +python3 stampede3.py install + + + +# pip list : you should see the ShakerMaker package in the list of installed packages +pip list + + From 585a866b08ae4596582b37d09edfd8fd970d1f56 Mon Sep 17 00:00:00 2001 From: amnp95 Date: Mon, 28 Apr 2025 12:44:17 -0700 Subject: [PATCH 17/17] fix stampede3.sh by adding missing cd command to return to the previous directory --- tacc/stampede3.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tacc/stampede3.sh b/tacc/stampede3.sh index 01fda00..d20e7fa 100644 --- a/tacc/stampede3.sh +++ b/tacc/stampede3.sh @@ -42,5 +42,6 @@ python3 stampede3.py install # pip list : you should see the ShakerMaker package in the list of installed packages pip list +cd ..