diff --git a/.github/workflows/pr-test.yaml b/.github/workflows/pr-test.yaml index 45c8bb2..2cf0620 100644 --- a/.github/workflows/pr-test.yaml +++ b/.github/workflows/pr-test.yaml @@ -78,19 +78,5 @@ jobs: # coordinates must match exactly (deterministic tiling) gt_coordinates = np.load('/gt/test-wsi.npy') coordinates = np.load('/output/coordinates/test-wsi.npy') - assert len(gt_coordinates) == len(coordinates), f'Number of coordinates mismatch: {len(coordinates)} vs {len(gt_coordinates)} ❌' - x_gt, y_gt = gt_coordinates['x'], gt_coordinates['y'] - x, y = coordinates['x'], coordinates['y'] - assert_array_equal(x, x_gt), 'x coordinates mismatch ❌' - assert_array_equal(y, y_gt), 'y coordinates mismatch ❌' - tile_level_gt = gt_coordinates['tile_level'] - tile_level = coordinates['tile_level'] - assert_array_equal(tile_level, tile_level_gt), 'tile_level mismatch ❌' - tile_size_gt = gt_coordinates['tile_size_resized'] - tile_size = coordinates['tile_size_resized'] - assert_array_equal(tile_size, tile_size_gt), 'tile_size_resized mismatch ❌' - resize_factor_gt = gt_coordinates['resize_factor'] - resize_factor = coordinates['resize_factor'] - assert_array_equal(resize_factor, resize_factor_gt), 'resize_factor mismatch ❌' - print("All coordinate checks passed ✅") + assert_array_equal(coordinates, gt_coordinates), f'Coordinates mismatch' PY" diff --git a/README.md b/README.md index b42f9c7..400054e 100644 --- a/README.md +++ b/README.md @@ -90,11 +90,12 @@ This file stores a numpy array of shape `(num_tiles, 8)` containing the followin 1. **`x`**: x-coordinate of the tile at level 0 2. **`y`**: y-coordinate of the tile at level 0 3. **`contour_index`**: index of the contour containing the tile (useful for masking non-tissue content) -4. **`tile_size_resized`**: size of the tile at the extraction level, which may differ from the requested tile size if the target spacing was not available -5. **`tile_level`**: pyramid level at which the tile was extracted -6. **`resize_factor`**: ratio between `tile_size_resized` and the requested tile size, useful for resizing when loading the tile -7. **`tile_size_lv0`**: tile size scaled to the slide's level 0 -8. **`target_spacing`**: spacing at which the user requested the tile (in microns per pixel) +4. **`target_tile_size`**: requested tile size (in pixels) +5. **`target_spacing`**: spacing at which the user requested the tile (in microns per pixel) +6. **`tile_level`**: pyramid level at which the tile was extracted +7. **`resize_factor`**: ratio between `tile_size_resized` and the requested tile size (`target_tile_size`), useful for resizing when loading the tile +8. **`tile_size_resized`**: size of the tile at the extraction level (`tile_level`), which may differ from the requested tile size (`target_tile_size`) if the target spacing was not available +9. **`tile_size_lv0`**: tile size scaled to the slide's level 0 ### Visualization (optional) diff --git a/hs2p/sampling.py b/hs2p/sampling.py index d1adb59..4ce0b6f 100644 --- a/hs2p/sampling.py +++ b/hs2p/sampling.py @@ -123,7 +123,7 @@ def process_slide( contour_indices=contour_indices, target_spacing=cfg.tiling.params.spacing, tile_level=tile_level, - tile_size=cfg.tiling.params.tile_size, + target_tile_size=cfg.tiling.params.tile_size, resize_factor=resize_factor, tile_size_lv0=tile_size_lv0, save_path=coordinates_path, @@ -172,7 +172,7 @@ def process_slide( contour_indices=contour_indices, target_spacing=cfg.tiling.params.spacing, tile_level=tile_level, - tile_size=cfg.tiling.params.tile_size, + target_tile_size=cfg.tiling.params.tile_size, resize_factor=resize_factor, tile_size_lv0=tile_size_lv0, save_path=coordinates_path, diff --git a/hs2p/tiling.py b/hs2p/tiling.py index 816b2ca..9279a79 100644 --- a/hs2p/tiling.py +++ b/hs2p/tiling.py @@ -78,7 +78,7 @@ def process_slide( contour_indices=contour_indices, target_spacing=cfg.tiling.params.spacing, tile_level=tile_level, - tile_size=cfg.tiling.params.tile_size, + target_tile_size=cfg.tiling.params.tile_size, resize_factor=resize_factor, tile_size_lv0=tile_size_lv0, save_path=coordinates_path, diff --git a/hs2p/wsi/__init__.py b/hs2p/wsi/__init__.py index 463ec16..233551b 100644 --- a/hs2p/wsi/__init__.py +++ b/hs2p/wsi/__init__.py @@ -81,12 +81,12 @@ def extract_coordinates( ) tolerance = tiling_params.tolerance starting_spacing = wsi.spacings[0] - desired_spacing = tiling_params.spacing - if desired_spacing < starting_spacing: - relative_diff = abs(starting_spacing - desired_spacing) / desired_spacing + target_spacing = tiling_params.spacing + if target_spacing < starting_spacing: + relative_diff = abs(starting_spacing - target_spacing) / target_spacing if relative_diff > tolerance: raise ValueError( - f"Desired spacing ({desired_spacing}) is smaller than the whole-slide image starting spacing ({starting_spacing}) and does not fall within tolerance ({tolerance})" + f"Desired spacing ({target_spacing}) is smaller than the whole-slide image starting spacing ({starting_spacing}) and does not fall within tolerance ({tolerance})" ) ( contours, @@ -141,12 +141,12 @@ def sample_coordinates( ) tolerance = tiling_params.tolerance starting_spacing = wsi.spacings[0] - desired_spacing = tiling_params.spacing - if desired_spacing < starting_spacing: - relative_diff = abs(starting_spacing - desired_spacing) / desired_spacing + target_spacing = tiling_params.spacing + if target_spacing < starting_spacing: + relative_diff = abs(starting_spacing - target_spacing) / target_spacing if relative_diff > tolerance: raise ValueError( - f"Desired spacing ({desired_spacing}) is smaller than the whole-slide image starting spacing ({starting_spacing}) and does not fall within tolerance ({tolerance})" + f"Desired spacing ({target_spacing}) is smaller than the whole-slide image starting spacing ({starting_spacing}) and does not fall within tolerance ({tolerance})" ) ( contours, @@ -264,7 +264,7 @@ def save_coordinates( contour_indices: list[int], target_spacing: float, tile_level: int, - tile_size: int, + target_tile_size: int, resize_factor: float, tile_size_lv0: int, save_path: Path, @@ -272,16 +272,17 @@ def save_coordinates( x = [x for x, _ in coordinates] # defined w.r.t level 0 y = [y for _, y in coordinates] # defined w.r.t level 0 ntile = len(x) - tile_size_resized = int(round(tile_size * resize_factor, 0)) + tile_size_resized = int(round(target_tile_size * resize_factor, 0)) dtype = [ ("x", int), ("y", int), ("contour_index", int), - ("tile_size_resized", int), + ("target_tile_size", int), + ("target_spacing", float), ("tile_level", int), ("resize_factor", float), + ("tile_size_resized", int), ("tile_size_lv0", int), - ("target_spacing", float), ] data = np.zeros(ntile, dtype=dtype) for i in range(ntile): @@ -289,11 +290,12 @@ def save_coordinates( x[i], y[i], contour_indices[i], - tile_size_resized, + target_tile_size, + target_spacing, tile_level, resize_factor, + tile_size_resized, tile_size_lv0, - target_spacing, ) data_arr = np.array(data) np.save(save_path, data_arr) diff --git a/hs2p/wsi/utils.py b/hs2p/wsi/utils.py index c1b95be..ee2e8e9 100644 --- a/hs2p/wsi/utils.py +++ b/hs2p/wsi/utils.py @@ -3,11 +3,11 @@ class HasEnoughTissue(object): - def __init__(self, contour, contour_holes, tissue_mask, tile_size, tile_spacing, resize_factor, seg_spacing, spacing_at_level_0, pct=0.01): + def __init__(self, contour, contour_holes, tissue_mask, target_tile_size, tile_spacing, resize_factor, seg_spacing, spacing_at_level_0, pct=0.01): self.cont = contour self.holes = contour_holes self.mask = tissue_mask // 255 - self.tile_size = tile_size + self.target_tile_size = target_tile_size self.tile_spacing = tile_spacing self.resize_factor = resize_factor self.seg_spacing = seg_spacing @@ -18,12 +18,12 @@ def __init__(self, contour, contour_holes, tissue_mask, tile_size, tile_spacing, # where contour and tissue masks are defined target_spacing = self.tile_spacing * self.resize_factor scale = self.seg_spacing / target_spacing - self.downsampled_tile_size = int(round(self.tile_size * 1 / scale, 0)) + self.downsampled_tile_size = int(round(self.target_tile_size * 1 / scale, 0)) assert ( self.downsampled_tile_size > 0 ), "downsampled tile_size is equal to zero, aborting; please consider using a smaller seg_params.downsample parameter" - self.tile_size_resized = int(round(tile_size * resize_factor,0)) + self.tile_size_resized = int(round(target_tile_size * resize_factor,0)) # precompute the combined tissue mask self.precomputed_mask = self._precompute_tissue_mask() diff --git a/hs2p/wsi/wsi.py b/hs2p/wsi/wsi.py index 0b7e4bc..7c9e2b4 100644 --- a/hs2p/wsi/wsi.py +++ b/hs2p/wsi/wsi.py @@ -45,9 +45,9 @@ class TilingParameters(NamedTuple): """ Parameters for tiling. """ - spacing: float # spacing at which to tile the slide, in microns per pixel - tolerance: float # for matching the spacing, deciding how much spacing can deviate from those specified in the slide metadata. - tile_size: int # size of the tiles to extract, in pixels + target_spacing: float # spacing at which to tile the slide, in microns per pixel + tolerance: float # for matching the target_spacing, deciding how much target_spacing can deviate from those specified in the slide metadata. + target_tile_size: int # size of the tiles to extract, in pixels overlap: float # overlap between tiles min_tissue_percentage: float # minimum percentage of tissue required to keep a tile when no sampling is performed drop_holes: bool # whether to drop tiles that fall within holes @@ -281,8 +281,7 @@ def load_segmentation( """ Load and process a segmentation mask for a whole slide image. - This method ensures that the segmentation mask and the slide have at least one - common spacing, determines the best level for the given downsample factor, and + This method determines the best level for the given downsample factor, and processes the segmentation mask to create a binary mask. Args: @@ -478,10 +477,10 @@ def get_tile_coordinates( Args: tiling_params (NamedTuple): Parameters for tiling, including: - - spacing (float): Desired spacing of the tiles. - - tolerance (float): Tolerance for matching the spacing, deciding how much - spacing can deviate from those specified in the slide metadata. - - tile_size (int): Desired size of the tiles at the target spacing. + - target_spacing (float): Desired spacing of the tiles. + - tolerance (float): Tolerance for matching the target_spacing, deciding how much + target_spacing can deviate from those specified in the slide metadata. + - target_tile_size (int): Desired size of the tiles at the target spacing. - overlap (float, optional): Overlap between adjacent tiles. Defaults to 0.0. - "drop_holes" (bool): If True, tiles falling within a hole will be excluded. Defaults to False. - "tissue_percentage" (dict[str, float]): Minimum amount pixels covered with tissue required for a tile for a given annotation. @@ -522,9 +521,9 @@ def get_tile_coordinates( ) = self.process_contours( contours, holes, - spacing=tiling_params.spacing, + target_spacing=tiling_params.spacing, tolerance=tiling_params.tolerance, - tile_size=tiling_params.tile_size, + target_tile_size=tiling_params.tile_size, overlap=tiling_params.overlap, drop_holes=tiling_params.drop_holes, use_padding=tiling_params.use_padding, @@ -611,8 +610,8 @@ def detect_contours( Args: target_spacing (float): Desired spacing at which tiles should be extracted. - tolerance (float): Tolerance for matching the spacing, deciding how much - spacing can deviate from those specified in the slide metadata. + tolerance (float): Tolerance for matching the target_spacing, deciding how much + target_spacing can deviate from those specified in the slide metadata. filter_params (NamedTuple): A NamedTuple containing filtering parameters: - "a_t" (int): Minimum area threshold for foreground contours. - "a_h" (int): Minimum area threshold for holes within contours. @@ -760,9 +759,9 @@ def process_contours( self, contours, holes, - spacing: float, + target_spacing: float, tolerance: float, - tile_size: int, + target_tile_size: int, overlap: float, drop_holes: bool, use_padding: bool, @@ -777,10 +776,10 @@ def process_contours( Args: contours (list): List of contours representing tissue blobs in the wsi. holes (list): List of tissue holes in each contour. - spacing (float): Desired spacing for tiling. - tolerance (float): Tolerance for matching the spacing, deciding how much - spacing can deviate from those specified in the slide metadata. - tile_size (int): Desired tile size in pixels. + target_spacing (float): Desired spacing for tiling. + tolerance (float): Tolerance for matching the target_spacing, deciding how much + target_spacing can deviate from those specified in the slide metadata. + target_tile_size (int): Desired tile size in pixels. overlap (float): Overlap between adjacent tiles. drop_holes (bool): Whether to drop tiles that fall within holes. use_padding (bool): Whether to pad the tiles to ensure full coverage. @@ -806,9 +805,9 @@ def process_single_contour(i): return self.process_contour( contours[i], holes[i], - spacing, + target_spacing, tolerance, - tile_size, + target_tile_size, overlap, drop_holes, use_padding, @@ -860,9 +859,9 @@ def process_contour( self, contour, contour_holes, - spacing: float, + target_spacing: float, tolerance: float, - tile_size: int, + target_tile_size: int, overlap: float, drop_holes: bool, use_padding: bool, @@ -874,10 +873,10 @@ def process_contour( Args: contour (numpy.ndarray): Contour to process, defined as a set of points. contour_holes (list): List of holes within the contour. - spacing (float): Target spacing for the tiles. - tolerance (float): Tolerance for matching the spacing, deciding how much - spacing can deviate from those specified in the slide metadata. - tile_size (int): Size of the tiles in pixels. + target_spacing (float): Target spacing for the tiles. + tolerance (float): Tolerance for matching the target_spacing, deciding how much + target_spacing can deviate from those specified in the slide metadata. + target_tile_size (int): Size of the tiles in pixels. overlap (float): Overlap between tiles. drop_holes (bool): Whether to drop tiles that fall within holes. use_padding (bool): Whether to pad the image to ensure full coverage. @@ -892,10 +891,10 @@ def process_contour( - resize_factor (float): The factor by which the tile size was resized. """ tile_level, is_within_tolerance = self.get_best_level_for_spacing( - spacing, tolerance + target_spacing, tolerance ) tile_spacing = self.get_level_spacing(tile_level) - resize_factor = spacing / tile_spacing + resize_factor = target_spacing / tile_spacing if is_within_tolerance: resize_factor = 1.0 @@ -903,7 +902,7 @@ def process_contour( resize_factor >= 1 ), f"Resize factor should be greater than or equal to 1. Got {resize_factor}" - tile_size_resized = int(round(tile_size * resize_factor,0)) + tile_size_resized = int(round(target_tile_size * resize_factor,0)) step_size = int(tile_size_resized * (1.0 - overlap)) if contour is not None: @@ -943,7 +942,7 @@ def process_contour( contour=cont, contour_holes=contour_holes, tissue_mask=mask, - tile_size=tile_size, + target_tile_size=target_tile_size, tile_spacing=tile_spacing, resize_factor=resize_factor, seg_spacing=seg_spacing, @@ -987,34 +986,4 @@ def process_contour( ) else: - return [], [], [], None, None - - @staticmethod - def process_coord_candidate( - coord, contour_holes, tile_size, cont_check_fn, drop_holes - ): - """ - Processes a candidate coordinate to determine if it should be kept based on - its location relative to contours and the percentage of tissue it contains. - - Args: - coord (tuple): (x, y) coordinate to be processed. - contour_holes (list): A list of contours and holes to check against. - tile_size (int): Size of the tile to consider. - cont_check_fn (callable): A function to check if the coordinate is within - the contours or holes. - drop_holes (bool): A flag indicating whether to drop tiles falling in holes during the check. - - Returns: - tuple: A tuple containing: - - coord (tuple or None): Input coordinate if it passes the check, - otherwise None. - - tissue_pct (float): Percentage of tissue in the tile. - """ - keep_flag, tissue_pct = WholeSlideImage.isInContours( - cont_check_fn, coord, contour_holes, drop_holes, tile_size - ) - if keep_flag: - return coord, tissue_pct - else: - return None, tissue_pct + return [], [], [], None, None \ No newline at end of file diff --git a/test/gt/test-wsi.npy b/test/gt/test-wsi.npy index c68ca54..d3eadd4 100644 Binary files a/test/gt/test-wsi.npy and b/test/gt/test-wsi.npy differ