diff --git a/phenograph/cluster.py b/phenograph/cluster.py index 5107614..4548eb6 100644 --- a/phenograph/cluster.py +++ b/phenograph/cluster.py @@ -287,11 +287,12 @@ def cluster( print("Setting directed=False because prune=True") directed = False + kernelargs = {} if n_jobs == 1: kernel = jaccard_kernel else: + kernelargs["n_jobs"] = n_jobs kernel = parallel_jaccard_kernel - kernelargs = {} # Start timer tic = time.time() diff --git a/phenograph/core.py b/phenograph/core.py index 6f84fc8..d49f323 100644 --- a/phenograph/core.py +++ b/phenograph/core.py @@ -136,14 +136,30 @@ def calc_jaccard(i, idx): return idx[i], coefficients -def parallel_jaccard_kernel(idx): +def parallel_jaccard_kernel(idx, n_jobs=-1): """Compute Jaccard coefficient between nearest-neighbor sets in parallel - :param idx: n-by-k integer matrix of k-nearest neighbors - :return (i, j, s): row indices, column indices, and nonzero values for a sparse adjacency matrix + Parameters + ---------- + idx + n-by-k integer matrix of k-nearest neighbors + n_jobs + Number of concurrently running workers. If 1 is given, no parallelism is + used. If set to -1, all CPUs are used. For n_jobs below -1, `n_cpus + 1 + n_jobs` + are used. + + Returns + ------- + i, j, s + row indices, column indices, and nonzero values for a sparse adjacency matrix """ + if n_jobs == -1: + n_jobs = len(os.sched_getaffinity(0)) + if n_jobs < -1: + n_jobs = len(os.sched_getaffinity(0)) + 1 + n_jobs + n = len(idx) - with closing(Pool()) as pool: + with closing(Pool(n_jobs)) as pool: jaccard_values = pool.starmap(calc_jaccard, zip(range(n), repeat(idx))) graph = sp.lil_matrix((n, n), dtype=float) @@ -269,7 +285,6 @@ def get_modularity(msg): run = 0 updated = 0 while run - updated < 20 and run < max_runs and (time.time() - tic) < time_limit: - # run community fout = open(filename + ".tree", "w") args = [ @@ -291,7 +306,6 @@ def get_modularity(msg): # continue only if we've reached a higher modularity than before if q[-1] - Q > tol: - Q = q[-1] updated = run