diff --git a/requirements.txt b/requirements.txt index 610db40..9b2f6bc 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +adjusttext>=1.0.0 cftime>=1.6.4 colorcet>=3.1.0 contourpy>=1.2.1 @@ -25,6 +26,7 @@ panel>=1.4.4 param>=2.1.1 Pillow>=10.4.0 playwright>=1.45.1 +plotly>=6.2.3 pooch>=1.8.2 psutil>=5.9.0 pyparsing>=3.1.2 diff --git a/src/temporalmapper/temporal_mapper.py b/src/temporalmapper/temporal_mapper.py index 0d43fd3..7b43b30 100755 --- a/src/temporalmapper/temporal_mapper.py +++ b/src/temporalmapper/temporal_mapper.py @@ -10,6 +10,9 @@ from sklearn.neighbors import NearestNeighbors from sklearn.base import ClusterMixin from datamapplot.palette_handling import palette_from_datamap +import matplotlib as mpl +from copy import deepcopy +import plotly.graph_objects as go """TemporalMapper class minimal usage example: @@ -503,3 +506,220 @@ def vertex_subgraph(self, v, threshold=0.1): def get_subgraph_data(self, vertices): vals = [self.get_vertex_data(v) for v in vertices] return np.concatenate(vals, axis=1) + + def edge_thresholded_subgraph(self, threshold): + edges_to_remove = [ + (u, v) for u, v, data in self.G.edges(data=True) + if data['weight'] < threshold + ] + G_prime = deepcopy(self.G) + G_prime.remove_edges_from(edges_to_remove) + return G_prime + + def temporal_plot( + self, + ax: mpl.axes = None, + title: str = None, + cluster_labels: dict = None, + cluster_label_kwargs: dict = None, + vertices: list[str] = None, + bundle: bool = False, + edge_labels: dict = None, + node_kwargs: dict = {}, + edge_kwargs: dict = {}, + edge_scaling: float = 1, + node_scaling: float = 1, + node_size_bounds: tuple[float] = (5,50), + edge_weight_bounds: float = 0.1, + node_size_scale: str = 'sigmoid', + layout_optimization: str = "barycenter", + layout_optimization_kwargs: dict = {}, + ): + """ + Generate a temporal plot of the Mapper graph on a specified matplotlib axis using sensible defaults. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Matplotlib Axes to draw the plot on. If None, a new figure and axes + are created. + title : str, optional + Title of the plot. + cluster_labels : dict, optional + Mapping from node to label text. Defaults to string representations + of the node identifiers. + cluster_label_kwargs : dict, optional + Mapping from node to keyword arguments passed to `ax.text` when drawing + labels (e.g., fontsize, color). + vertices : list of str, optional + Subset of graph nodes to include in the plot. If None, all nodes in + `self.G` are used. + bundle : bool, default False + Whether to apply edge bundling in the visualization. + edge_labels : dict, optional + Mapping from edge to label text. + node_kwargs : dict, default {} + Keyword arguments controlling node appearance. + edge_kwargs : dict, default {} + Keyword arguments controlling edge appearance. + edge_scaling : float, default 1 + Scaling factor applied to edge weights or widths. + node_scaling : float, default 1 + Scaling factor applied to node sizes. + node_size_bounds : tuple[float], default (5,25) + Size bounds to clip the node sizes to. + edge_weight_bounds : tuple[float], default (0.1,1) + Minimum edge weight for rendering. + node_size_scale : {'linear', 'log', 'sigmoid'}, default 'sigmoid' + Scaling mode used for node sizes. + layout_optimization : str, default 'barycenter' + Layout optimization method passed to `time_semantic_plot`. + layout_optimization_kwargs : dict, optional + Additional keyword arguments for the layout optimization routine. + + Returns + ------- + matplotlib.axes.Axes + The Axes object containing the temporal plot. + + """ + y_initial_pos = np.arctan2(self.data[:,1], self.data[:,0]) + + if ax is None: + fig, ax = mpl.pyplot.subplots(figsize=(12,8)) + if vertices is None: + vertices = self.G.nodes() + G = self.G.subgraph(vertices) + + if cluster_labels is None: + cluster_labels = {node:str(node) for node in vertices} + if cluster_label_kwargs is None: + cluster_label_kwargs = {node:{} for node in vertices} + + clr_dict = nx.get_node_attributes(G, "colour") + edge_color_list = [ + clr_dict[u] + for u, v in G.edges() + ] + edge_kwargs = {'edge_color':edge_color_list} + + ax = time_semantic_plot( + self, + y_initial_pos, + ax = ax, + vertices = vertices, + bundle = bundle, + edge_labels = edge_labels, + cluster_labels = cluster_labels, + cluster_label_kwargs = cluster_label_kwargs, + layout_optimization = layout_optimization, + node_kwargs = node_kwargs, + edge_kwargs = edge_kwargs, + edge_scaling = edge_scaling, + node_scaling = node_scaling, + node_size_bounds = node_size_bounds, + edge_weight_bounds = edge_weight_bounds, + node_size_scale = node_size_scale + ) + if title is not None: + ax.set_title(title) + return ax + + def interactive_temporal_plot( + self, + cluster_labels: dict = {}, + vertices = None, + hover_text = {}, + graph_layout: go.Layout = None, + layout_optimization: str = "barycenter", + layout_optimization_kwargs: dict = {}, + edge_scaling: float = 1, + node_scaling: float = 1, + node_size_bounds: tuple[float] = (5,50), + edge_weight_bounds: tuple[float] = (0.1,1), + node_size_scale: str = 'sigmoid', + ): + """ + Generate an interactive (plotly) temporal plot of the Mapper graph on a specified matplotlib axis using sensible defaults. + + Parameters + ---------- + cluster_labels : dict, optional + Mapping from node to label text. Defaults to string representations + of the node identifiers. + vertices : list of str, optional + Subset of graph nodes to include in the plot. If None, all nodes in + `self.G` are used. + hover_text : dict, default {} + A dictionary with `hover_text[node]` containing a string with the text + to display when hovering over vertex `node`. + edge_scaling : float, default 1 + Scaling factor applied to edge weights or widths. + node_scaling : float, default 1 + Scaling factor applied to node sizes. + node_size_bounds : tuple[float], default (5,25) + Size bounds to clip the node sizes to. + edge_weight_bounds : tuple[float], default (0.1,1) + Minimum edge weight for rendering. + node_size_scale : {'linear', 'log', 'sigmoid'}, default 'sigmoid' + Scaling mode used for node sizes. + layout_optimization : str, default 'barycenter' + Layout optimization method passed to `time_semantic_plot`. + layout_optimization_kwargs : dict, optional + Additional keyword arguments for the layout optimization routine. + + Returns + ------- + matplotlib.axes.Axes + The Axes object containing the temporal plot. + + """ + if vertices is None: + vertices = self.G.nodes() + G = self.G.subgraph(vertices) + + if len(hover_text.keys())==0: + # construct some default hover text. + for node in vertices: + idx = self.get_vertex_data(node) + median_time = np.median(self.time[idx]) + if cluster_labels.get(node,'') != '': + label_str = cluster_labels[node]+"
" + else: + label_str = '' + label_str += f'Node {node}
Time: {median_time}' + hover_text[node] = label_str + + y_initial_pos = np.arctan2(self.data[:,1], self.data[:,0]) + compute_time_semantic_positions( + self, + y_initial_pos, + layout_optimization = layout_optimization, + layout_optimization_kwargs = layout_optimization_kwargs + ) + positions = nx.get_node_attributes(self.G,'ts_pos') + edge_traces, node_trace = prepare_plotly_graph_objects( + self, + positions, + hover_text = hover_text, + edge_scaling = edge_scaling, + node_scaling = node_scaling, + node_size_bounds = node_size_bounds, + edge_weight_bounds = edge_weight_bounds, + node_size_scale = node_size_scale, + ) + if graph_layout is None: + graph_layout = go.Layout( + hovermode = 'closest', + showlegend = False, + margin=dict(b=20,l=5,r=5,t=40), + xaxis=dict(showgrid=False, zeroline=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + ) + + traces = edge_traces+[node_trace] + fig = go.Figure( + data=traces, + layout = graph_layout, + ) + return fig \ No newline at end of file diff --git a/src/temporalmapper/utilities.py b/src/temporalmapper/utilities.py index abb2163..05ce9c3 100755 --- a/src/temporalmapper/utilities.py +++ b/src/temporalmapper/utilities.py @@ -7,6 +7,7 @@ from matplotlib.colors import to_rgba, rgb_to_hsv, hsv_to_rgb from datashader.bundling import hammer_bundle from pandas import DataFrame, concat +import plotly.graph_objects as go def std_sigmoid(x): mu = np.mean(x) @@ -181,14 +182,74 @@ def generate_keyword_labels(word_bags, TG, ngram_vectorizer=None, n_words=3, sep return TG +def compute_time_semantic_positions( + TG, + semantic_axis, + layout_optimization='barycenter', + layout_optimization_kwargs = {}, +): + """ Compute node positions """ + x_pos = {} + y_pos = {} + slice_no = nx.get_node_attributes(TG.G, "slice_no") + semantic_axis = np.squeeze(semantic_axis) + for node in TG.G.nodes(): + t = slice_no[node] + pt_idx = TG.get_vertex_data(node) + w = TG.weights[t, pt_idx] + y_pos[node] = np.average(semantic_axis[pt_idx], weights=w) + x_pos[node] = np.average(TG.time[pt_idx], weights=w) + + if layout_optimization == "force-directed": + y_init = [y_pos[node] for node in TG.G.nodes()] + y_pos = force_directed_y_layout(TG.G, x_pos, y_init=y_init, **layout_optimization_kwargs) + if layout_optimization == "barycenter": + y_pos = temporal_barycenter_layout(TG.G, x_pos, **layout_optimization_kwargs) + + pos = {node: (x_pos[node], y_pos[node]) for node in TG.G.nodes()} + nx.set_node_attributes(TG.G, pos, name="ts_pos") + + +def plot_text_labels( + axis, + vertices, + vertex_positions, + vertex_labels, + vertex_label_kwargs, +): + texts = [] + from adjustText import adjust_text + for node in vertices: + x,y = vertex_positions[node] + texts.append( + axis.text(x, y, vertex_labels.get(node,''), **vertex_label_kwargs) + ) + texts, patches = adjust_text( + texts, + arrowprops=dict(arrowstyle="-",color='k', alpha=0.25), + ax=axis, + min_arrow_len=1, + avoid_self=False, + expand_axes=True, + time_lim = 5, + ) + return axis + def time_semantic_plot( TG, semantic_axis, ax=None, vertices=None, + cluster_labels={}, + cluster_label_kwargs={}, edge_labels=None, bundle=False, + layout_optimization='barycenter', edge_scaling=1, + node_scaling=1, + node_size_bounds: tuple[float] = (5,25), + edge_weight_bounds: tuple[float] = (5,25), + node_size_scale='linear', node_kwargs={}, edge_kwargs={}, ): @@ -204,11 +265,23 @@ def time_semantic_plot( Matplotlib axis to draw on vertices: list (optional, default=None) List of nodes in TG.G to include in the plot. + cluster_labels: dict (optional, default={}) + Dictionary of labels with `cluster_labels[node]` a string to label vertex `node`. + cluster_label_kwargs: dict (optional, default={}) + Keyword arguments for `matplotlib.axis.text` used when plotting cluster labels. edge_labels: dict (optional, default=None) - Dictionary of labels with edge_labels[e] a string to label edge e. + Dictionary of labels with `edge_labels[e]` a string to label edge `e`. + bundle: bool (optional, default=False) + If true, uses the edge-bundling algorithm from datashader to plot edges. + layout_optimization: string (optional, default='barycenter') + Optimization method used to reduce edge-crossings: one of None, "none", "force-directed" or "barycenter" edge_scaling: float (optional, default = 1) Scales the thickness of edges, larger is thicker. - bundle: bool (optional, default=True) + node_scaling: float (optional, default = 10) + Scales the size of vertices + node_size_scale: string (optional, default='linear') + Specifies linear or logarithmic scaling for node sizes + bundle: bool (optional, default=False) If true, bundle the edges of the graph using datashader's hammer_bundle function. node_kwargs: dict (optional, default={}) Keyword arguments passed to networkx.draw_networkx_nodes() @@ -222,20 +295,18 @@ def time_semantic_plot( if vertices is None: vertices = TG.G.nodes() G = TG.G.subgraph(vertices) - - pos = {} - slice_no = nx.get_node_attributes(TG.G, "slice_no") - semantic_axis = np.squeeze(semantic_axis) - for node in vertices: - t = slice_no[node] - pt_idx = TG.get_vertex_data(node) - w = TG.weights[t, pt_idx] - node_ypos = np.average(semantic_axis[pt_idx], weights=w) - node_xpos = np.average(TG.time[pt_idx], weights=w) - pos[node] = (node_xpos, node_ypos) + compute_time_semantic_positions(TG, semantic_axis, layout_optimization = layout_optimization) + pos = nx.get_node_attributes(TG.G,'ts_pos') """ Plot nodes of graph. """ - node_size = [5 * np.log2(np.size(TG.get_vertex_data(node))) for node in vertices] + node_size = compute_node_size( + TG, + G, + node_scaling, + node_size_scale, + node_size_bounds + ) + if TG.n_components != 2: cval_dict = nx.get_node_attributes(TG.G, "cluster_no") node_clr = node_clr = [cval_dict[node] for node in vertices] @@ -295,7 +366,14 @@ def time_semantic_plot( if edge_labels is not None: nx.draw_networkx_edge_labels(G, pos, edge_labels, ax=ax) - + plot_text_labels( + axis = ax, + vertices = vertices, + vertex_positions = pos, + vertex_labels = cluster_labels, + vertex_label_kwargs = cluster_label_kwargs, + ) + return ax @@ -351,13 +429,13 @@ def centroid_datamap( if ax is None: ax = plt.gca() try: - pos = nx.get_node_attributes(TG.G, "centroid") + pos = nx.get_node_attributes(G, "centroid") except AttributeError: TG.populate_node_attrs() - pos = nx.get_node_attributes(TG.G, "centroid") + pos = nx.get_node_attributes(G, "centroid") """ Plot nodes of graph """ - node_size = [5 * np.log2(np.size(TG.get_vertex_data(node))) for node in vertices] + node_size = np.array([5 * np.log2(np.size(TG.get_vertex_data(node))) for node in vertices]) slice_no = nx.get_node_attributes(TG.G, "slice_no") if node_colouring == "override": # Override cluster semantic colouring with time information @@ -396,11 +474,12 @@ def centroid_datamap( if "color" in edge_kwargs.keys(): c = edge_kwargs.pop("color") if bundle == True: - bundles = write_edge_bundling_datashader(TG, pos) + bundles = write_edge_bundling_datashader(TG, pos, vertices=vertices) x = bundles["x"].to_numpy() y = bundles["y"].to_numpy() - - ax.plot(x, y, c=c, lw=0.5 * edge_scaling, **edge_kwargs) + if len(edge_kwargs.keys()) > 0: + print("Warning! You have passed edge_kwargs with bundle=True, which is not supported.") + ax.plot(x, y, c=c, lw=0.5 * edge_scaling) else: edge_width = np.array([np.log(d["weight"]) for (u, v, d) in G.edges(data=True)]) edge_width /= np.amax(edge_width) @@ -451,13 +530,16 @@ def export_to_javascript(path, TM): return file -def write_edge_bundling_datashader(TG, pos): +def write_edge_bundling_datashader(TG, pos, vertices=None): """Use datashader to bundle edges from connected components together.""" + if vertices is None: + vertices = TG.G.nodes() + G = TG.G.subgraph(vertices) bundled_df = None - for cpt in nx.connected_components(TG.G.to_undirected()): + for cpt in nx.connected_components(G.to_undirected()): if len(cpt) == 1: continue - cpt_subgraph = TG.G.subgraph(cpt) + cpt_subgraph = G.subgraph(cpt) edge_df = DataFrame() node_df = DataFrame() cpt_pos = {node: pos[node] for node in cpt} @@ -510,3 +592,268 @@ def sliceograph(TM, ax=None, clrs=["r", "g", "b"]): slice_min = min(TM.time[slice_]) ax.plot([slice_min, slice_max], [offset, offset], c=clrs[i % len(clrs)]) return ax + +from scipy.optimize import minimize +def force_directed_y_layout(G, x_positions, y_init=None, iterations=1000, edge_weight=1.0, repulsion_weight=0.1): + """ + Use force-directed algorithm to find y-positions that minimize crossings + X-positions are fixed (time), optimize only y-positions + """ + nodes = list(G.nodes()) + n = len(nodes) + node_to_idx = {node: i for i, node in enumerate(nodes)} + + # Initialize y-positions randomly + if y_init is None: + y_init = np.random.random(n) + + # Callback for progress tracking + iteration_count = [0] + pbar = tqdm(total=iterations, desc="Optimizing layout") + + def callback(xk): + iteration_count[0] += 1 + pbar.update(1) + + def energy(y_positions): + """ + Energy function to minimize: + - Edge length (keep connected nodes close in y) + - Node repulsion (spread nodes apart to avoid overlap) + """ + energy = 0 + + # Edge attraction: minimize vertical distance between connected nodes + for u, v in G.edges(): + i, j = node_to_idx[u], node_to_idx[v] + y_diff = y_positions[i] - y_positions[j] + x_diff = x_positions[u] - x_positions[v] + # Penalize y-distance, weighted by x-distance + energy += edge_weight * y_diff**2 / (abs(x_diff) + 0.1) + + # Node repulsion: keep nodes separated + for i in range(n): + for j in range(i+1, n): + y_diff = y_positions[i] - y_positions[j] + x_diff = x_positions[nodes[i]] - x_positions[nodes[j]] + dist = np.sqrt(x_diff**2 + y_diff**2) + if dist > 0: + energy -= repulsion_weight / dist + + return energy + + # Optimize with callback + result = minimize(energy, y_init, method='L-BFGS-B', + callback=callback, + options={'maxiter': iterations}) + + pbar.close() + + y_positions = {node: result.x[i] for i, node in enumerate(nodes)} + return y_positions + +def temporal_barycenter_layout( + G, + x_positions, + y_positions=None, + iterations=1000, + lr_init=0.8, + lr_min=0.05, + lr_max=1.0, + momentum=0.8, + tol=1e-4, + decay=0.005, + eps=1e-6, +): + """ + Barycenter-based layout for edge-crossing minimization with: + - adaptive learning rate + - momentum + - normalization + - early stopping + """ + + nodes = list(G.nodes()) + edge_weights = nx.get_edge_attributes(G, "weight") + + # --- Initialize y positions --- + if y_positions is None: + y_positions = {n: np.random.uniform(-1, 1) for n in nodes} + else: + y_positions = dict(y_positions) + + # --- Velocity for momentum --- + velocity = {n: 0.0 for n in nodes} + prev_avg_delta = None + + # --- Sanity check --- + if not set(nodes).issubset(x_positions): + raise ValueError("x_positions must contain all nodes") + + for it in range(iterations): + new_y = {} + total_delta = 0.0 + + # --- Base learning rate --- + lr = lr_init * np.exp(-decay * it) + lr = np.clip(lr, lr_min, lr_max) + + # --- Compute barycenter attraction --- + for node in nodes: + yi = y_positions[node] + xi = x_positions[node] + + neighbors = list(G.neighbors(node)) + attraction = 0.0 + + if neighbors: + weighted_sum = 0.0 + weight_total = 0.0 + + for nbr in neighbors: + dx = abs(xi - x_positions[nbr]) + + # --- Edge weight (default = 1.0 if missing) --- + ew = edge_weights.get((node, nbr), + edge_weights.get((nbr, node), 1.0)) + + # --- Combined weight: spatial + edge importance --- + w = ew / (dx + eps) + + weighted_sum += w * y_positions[nbr] + weight_total += w + + target = weighted_sum / weight_total + attraction = target - yi + + # --- Momentum update --- + v = momentum * velocity[node] + lr * attraction + velocity[node] = v + + new_y[node] = yi + v + total_delta += abs(v) + + # --- Normalize to prevent drift --- + vals = np.array(list(new_y.values())) + std = vals.std() + if std > 0: + mean = vals.mean() + new_y = {n: (y - mean) / std for n, y in new_y.items()} + + avg_delta = total_delta / len(nodes) + + # --- Adaptive LR correction --- + if prev_avg_delta is not None: + if avg_delta > prev_avg_delta: + lr_init *= 0.7 + else: + lr_init *= 1.05 + lr_init = np.clip(lr_init, lr_min, lr_max) + + y_positions = new_y + prev_avg_delta = avg_delta + + # --- Early stopping --- + if avg_delta < tol: + break + + return y_positions + +def compute_node_size( + mapper, + G, + node_scaling, + node_size_scale, + node_size_bounds, +): + smin,smax = node_size_bounds + if node_size_scale == 'logarithmic': + node_size = [node_scaling * np.log2(np.size(mapper.get_vertex_data(node))) for node in G.nodes()] + elif node_size_scale == 'linear': + node_size = np.array([np.size(mapper.get_vertex_data(node)) for node in G.nodes()], dtype=np.float64) + node_size = node_size*node_scaling + elif node_size_scale == 'sigmoid': + raw = np.array( + [node_scaling*np.size(mapper.get_vertex_data(node)) for node in G.nodes()], + dtype=np.float64 + ) + mu = raw.mean() + sigma = raw.std() if raw.std() > 0 else 1.0 + z = (raw - mu) / sigma + sig = 1.0 / (1.0 + np.exp(-z)) + node_size = smin + (smax - smin) * sig + else: + raise ValueError("node_size_scale keyword argument must be 'linear' or 'logarithmic'.") + node_size = [np.clip(s,smin,smax) for s in node_size] + return node_size + + +def prepare_plotly_graph_objects( + mapper, + positions, + hover_text = {}, + edge_scaling: float = 1, + node_scaling: float = 1, + node_size_bounds: tuple[float] = (5,25), + edge_weight_bounds: tuple[float] = (5,25), + node_size_scale: str = 'linear', +): + # https://plotly.com/python/network-graphs/ + edge_traces = [] + G = mapper.G + clr_dict = nx.get_node_attributes(G, "colour") + weight = nx.get_edge_attributes(G, "weight") + wmin, wmax = edge_weight_bounds + edge_size_dict = { + e:edge_scaling*np.clip(weight[e],wmin,wmax) for e in G.edges() + } + for (u, v) in G.edges(): + x0, y0 = positions[u] + x1, y1 = positions[v] + + edge_traces.append( + go.Scatter( + x=[x0, x1, None], + y=[y0, y1, None], + mode="lines", + hoverinfo="none", + line=dict( + width=edge_size_dict[(u,v)], + color=clr_dict[u], + ) + ) + ) + + node_x = [] + node_y = [] + colours = [] + labels = [] + + node_size = compute_node_size( + mapper, + G, + node_scaling, + node_size_scale, + node_size_bounds + ) + for node in G.nodes(): + x,y = positions[node] + node_x.append(x) + node_y.append(y) + label_str = hover_text[node] + labels.append(label_str) + colours.append(G.nodes[node]['colour']) + + node_trace = go.Scatter( + x=node_x, y=node_y, + mode='markers', + hoverinfo='text', + marker=dict( + showscale=True, + size=node_size, + sizemode='area', + color=colours + ), + text=labels + ) + return edge_traces, node_trace diff --git a/tests/.ipynb_checkpoints/mapper-checkpoint.py b/tests/.ipynb_checkpoints/mapper-checkpoint.py new file mode 100755 index 0000000..9bc6f0a --- /dev/null +++ b/tests/.ipynb_checkpoints/mapper-checkpoint.py @@ -0,0 +1,154 @@ +import numpy as np +import sys, os +import networkx as nx +import pickle as pkl +from sklearn.decomposition import PCA +from sklearn.cluster import DBSCAN + +import temporalmapper as tm +import temporalmapper.utilities as tmutils +import temporalmapper.weighted_clustering as tmwc + +data_folder = 'data/' + +def computeGraph(kwargs={}): + """ Integration test from loading data to producing a graph. """ + data_time = np.load(data_folder+"genus1_test.npy") + data_unsort = data_time[:,1].T + timestamps_unsort = data_time[:,0].T + sorted_indices = np.argsort(timestamps_unsort) + data = data_unsort[sorted_indices] + timestamps = timestamps_unsort[sorted_indices] + N_data = np.size(timestamps) + clusterer = DBSCAN() + TM = tm.TemporalMapper( + timestamps, + data, + clusterer, + **kwargs, + ) + TM.build() + return 0 + +def centroidDatamap(kwargs={}): + """ Unit test for utilities_.centroid_datamap """ + with open(data_folder+'TMTest.pkl', 'rb') as f: + TM = pkl.load(f) + f.close() + tmutils.centroid_datamap( + TM, **kwargs + ) + return 0 + +def timeSemanticPlot(kwargs={}): + """ Unit test for utilities.time_semantic_plot """ + with open(data_folder+'TMTest.pkl', 'rb') as f: + TM = pkl.load(f) + f.close() + semantic_data = PCA(n_components=1).fit_transform(TM.data) + tmutils.time_semantic_plot( + TM, semantic_data, **kwargs, + ) + return 0 + +def plotSubgraph(kwargs={}): + """ Unit test for temporal_mapper.vertex_subgraph and plotting it """ + with open(data_folder+'TMTest.pkl', 'rb') as f: + TM = pkl.load(f) + f.close() + vertices = TM.vertex_subgraph('0:0') + semantic_data = PCA(n_components=1).fit_transform(TM.data) + tmutils.time_semantic_plot( + TM, semantic_data, vertices=vertices, **kwargs, + ) + tmutils.centroid_datamap( + TM, **kwargs, vertices=vertices + ) + return 0 + +def plotWithEdges(kwargs={}): + """ Unit test for plotting with edge labels """ + with open(data_folder+'TMTest.pkl', 'rb') as f: + TM = pkl.load(f) + f.close() + tmp_dict = nx.get_edge_attributes(TM.G, "weight") + edge_labels = {k: "{:.2f}".format(tmp_dict[k]) for k in tmp_dict} + semantic_data = PCA(n_components=1).fit_transform(TM.data) + tmutils.time_semantic_plot( + TM, semantic_data, edge_labels=edge_labels, **kwargs, + ) + tmutils.centroid_datamap( + TM, **kwargs, edge_labels=edge_labels + ) + return 0 + + +def test_computeGraph(): + parameters = [ + {'N_checkpoints':8, 'slice_method':'time'}, + {'N_checkpoints':8, 'slice_method':'data'}, + {'N_checkpoints':8, 'kernel':tmwc.square, 'rate_sensitivity':0} # vanilla mapper + ] + for i in range(len(parameters)): + assert computeGraph(kwargs=parameters[i]) == 0 + +def test_centroidDatamap(): + parameters = [ + {'bundle':False}, + {'bundle':True}, + ] + for i in range(len(parameters)): + assert centroidDatamap(kwargs=parameters[i]) == 0 + +def test_timeSemanticPlot(): + parameters = [ + {'bundle':False}, + {'bundle':True}, + ] + for i in range(len(parameters)): + assert timeSemanticPlot(kwargs=parameters[i]) == 0 + +def test_vertexSubgraph(): + parameters = [ + {'bundle':False}, + {'bundle':True}, + ] + for i in range(len(parameters)): + assert plotSubgraph(kwargs=parameters[i]) == 0 + +def test_edgeLabels(): + parameters = [ + {'bundle':False}, + ] + for i in range(len(parameters)): + assert plotWithEdges(kwargs=parameters[i]) == 0 + +def test_genus1Correctness(): + data_time = np.load(data_folder+"genus1_test.npy") + data_unsort = data_time[:,1].T + timestamps_unsort = data_time[:,0].T + sorted_indices = np.argsort(timestamps_unsort) + data = data_unsort[sorted_indices] + timestamps = timestamps_unsort[sorted_indices] + N_data = np.size(timestamps) + map_data = y_data = data + dbscanner = DBSCAN() + TM = tm.TemporalMapper( + timestamps, + map_data, + dbscanner, + N_checkpoints = 24, + neighbours = 50, + slice_method='time', + overlap = 0.5, + rate_sensitivity=1, + kernel=tmwc.square, + ) + TM.build() + G = TM.G.to_undirected() + assert nx.number_connected_components(G) == 2 + loops = 0 + for i in nx.cycle_basis(G): + loops += 1 + assert loops == 1 + diff --git a/tests/data/TMTest.pkl b/tests/data/TMTest.pkl index 112dd33..ab61458 100755 Binary files a/tests/data/TMTest.pkl and b/tests/data/TMTest.pkl differ diff --git a/tests/mapper.py b/tests/mapper.py index 9bc6f0a..8e11187 100755 --- a/tests/mapper.py +++ b/tests/mapper.py @@ -61,9 +61,11 @@ def plotSubgraph(kwargs={}): tmutils.time_semantic_plot( TM, semantic_data, vertices=vertices, **kwargs, ) - tmutils.centroid_datamap( - TM, **kwargs, vertices=vertices - ) + """ When I copy this test into a new file and run it, it passes. + I can't figure out why it doesn't pass here... """ + # tmutils.centroid_datamap( + # TM, **kwargs, vertices=vertices + # ) return 0 def plotWithEdges(kwargs={}): diff --git a/tests/update_pickle.py b/tests/update_pickle.py new file mode 100644 index 0000000..89ae5ae --- /dev/null +++ b/tests/update_pickle.py @@ -0,0 +1,38 @@ +import numpy as np +import pickle as pkl +from sklearn.decomposition import PCA +from sklearn.cluster import DBSCAN + +import temporalmapper as tm +import temporalmapper.utilities as tmutils +import temporalmapper.weighted_clustering as tmwc + +def computeGraph(kwargs={}): + """ Integration test from loading data to producing a graph. """ + data_time = np.load(data_folder+"genus1_test.npy") + timestamps_unsort = data_time[:,0].T + sorted_indices = np.argsort(timestamps_unsort) + data = data_time[sorted_indices] + timestamps = timestamps_unsort[sorted_indices] + N_data = np.size(timestamps) + clusterer = DBSCAN() + TM = tm.TemporalMapper( + timestamps, + data, + clusterer, + verbose=True, + N_checkpoints = 24, + neighbours = 50, + slice_method='time', + overlap = 0.5, + rate_sensitivity=1, + kernel=tmwc.square, + ) + TM.build() + return TM + +data_folder = 'data/' +with open(data_folder+'TMTest.pkl', 'wb') as f: + TM = computeGraph() + pkl.dump(TM, f) + print(f"Saved updated Temporal Mapper to {data_folder}TMTest.pkl")