diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
index 9e2af39..0648e5d 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,5 @@
-
-
-# Python WSI Preprocessing
-
-This project contains a variety of files for investigating image preprocessing using Python
-with the aim of using deep learning to perform histopathology image classification of
-whole slide images.
-
-See main tutorial [here](./docs/wsi-preprocessing-in-python/index.md).
-
-See main project at [https://github.com/CODAIT/deep-histopath](https://github.com/CODAIT/deep-histopath)
-for more information.
+Go to https://github.com/FAU-DLM/wsi_processing_pipelin for changes in this library.
+All advances and new features will be committed there.
diff --git a/deephistopath/wsi/slide.py b/deephistopath/wsi/slide.py
deleted file mode 100644
index 3bde6c2..0000000
--- a/deephistopath/wsi/slide.py
+++ /dev/null
@@ -1,1027 +0,0 @@
-# ------------------------------------------------------------------------
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ------------------------------------------------------------------------
-
-import glob
-import math
-import matplotlib.pyplot as plt
-import multiprocessing
-import numpy as np
-import openslide
-from openslide import OpenSlideError
-import os
-import PIL
-from PIL import Image
-import re
-import sys
-from deephistopath.wsi import util
-from deephistopath.wsi.util import Time
-
-BASE_DIR = os.path.join(".", "data")
-# BASE_DIR = os.path.join(os.sep, "Volumes", "BigData", "TUPAC")
-TRAIN_PREFIX = "TUPAC-TR-"
-SRC_TRAIN_DIR = os.path.join(BASE_DIR, "training_slides")
-SRC_TRAIN_EXT = "svs"
-DEST_TRAIN_SUFFIX = "" # Example: "train-"
-DEST_TRAIN_EXT = "png"
-SCALE_FACTOR = 32
-DEST_TRAIN_DIR = os.path.join(BASE_DIR, "training_" + DEST_TRAIN_EXT)
-THUMBNAIL_SIZE = 300
-THUMBNAIL_EXT = "jpg"
-
-DEST_TRAIN_THUMBNAIL_DIR = os.path.join(BASE_DIR, "training_thumbnail_" + THUMBNAIL_EXT)
-
-FILTER_SUFFIX = "" # Example: "filter-"
-FILTER_RESULT_TEXT = "filtered"
-FILTER_DIR = os.path.join(BASE_DIR, "filter_" + DEST_TRAIN_EXT)
-FILTER_THUMBNAIL_DIR = os.path.join(BASE_DIR, "filter_thumbnail_" + THUMBNAIL_EXT)
-FILTER_PAGINATION_SIZE = 50
-FILTER_PAGINATE = True
-FILTER_HTML_DIR = BASE_DIR
-
-TILE_SUMMARY_DIR = os.path.join(BASE_DIR, "tile_summary_" + DEST_TRAIN_EXT)
-TILE_SUMMARY_ON_ORIGINAL_DIR = os.path.join(BASE_DIR, "tile_summary_on_original_" + DEST_TRAIN_EXT)
-TILE_SUMMARY_SUFFIX = "tile_summary"
-TILE_SUMMARY_THUMBNAIL_DIR = os.path.join(BASE_DIR, "tile_summary_thumbnail_" + THUMBNAIL_EXT)
-TILE_SUMMARY_ON_ORIGINAL_THUMBNAIL_DIR = os.path.join(BASE_DIR, "tile_summary_on_original_thumbnail_" + THUMBNAIL_EXT)
-TILE_SUMMARY_PAGINATION_SIZE = 50
-TILE_SUMMARY_PAGINATE = True
-TILE_SUMMARY_HTML_DIR = BASE_DIR
-
-TILE_DATA_DIR = os.path.join(BASE_DIR, "tile_data")
-TILE_DATA_SUFFIX = "tile_data"
-
-TOP_TILES_SUFFIX = "top_tile_summary"
-TOP_TILES_DIR = os.path.join(BASE_DIR, TOP_TILES_SUFFIX + "_" + DEST_TRAIN_EXT)
-TOP_TILES_THUMBNAIL_DIR = os.path.join(BASE_DIR, TOP_TILES_SUFFIX + "_thumbnail_" + THUMBNAIL_EXT)
-TOP_TILES_ON_ORIGINAL_DIR = os.path.join(BASE_DIR, TOP_TILES_SUFFIX + "_on_original_" + DEST_TRAIN_EXT)
-TOP_TILES_ON_ORIGINAL_THUMBNAIL_DIR = os.path.join(BASE_DIR,
- TOP_TILES_SUFFIX + "_on_original_thumbnail_" + THUMBNAIL_EXT)
-
-TILE_DIR = os.path.join(BASE_DIR, "tiles_" + DEST_TRAIN_EXT)
-TILE_SUFFIX = "tile"
-
-STATS_DIR = os.path.join(BASE_DIR, "svs_stats")
-
-
-def open_slide(filename):
- """
- Open a whole-slide image (*.svs, etc).
-
- Args:
- filename: Name of the slide file.
-
- Returns:
- An OpenSlide object representing a whole-slide image.
- """
- try:
- slide = openslide.open_slide(filename)
- except OpenSlideError:
- slide = None
- except FileNotFoundError:
- slide = None
- return slide
-
-
-def open_image(filename):
- """
- Open an image (*.jpg, *.png, etc).
-
- Args:
- filename: Name of the image file.
-
- returns:
- A PIL.Image.Image object representing an image.
- """
- image = Image.open(filename)
- return image
-
-
-def open_image_np(filename):
- """
- Open an image (*.jpg, *.png, etc) as an RGB NumPy array.
-
- Args:
- filename: Name of the image file.
-
- returns:
- A NumPy representing an RGB image.
- """
- pil_img = open_image(filename)
- np_img = util.pil_to_np_rgb(pil_img)
- return np_img
-
-
-def get_training_slide_path(slide_number):
- """
- Convert slide number to a path to the corresponding WSI training slide file.
-
- Example:
- 5 -> ../data/training_slides/TUPAC-TR-005.svs
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the WSI training slide file.
- """
- padded_sl_num = str(slide_number).zfill(3)
- slide_filepath = os.path.join(SRC_TRAIN_DIR, TRAIN_PREFIX + padded_sl_num + "." + SRC_TRAIN_EXT)
- return slide_filepath
-
-
-def get_tile_image_path(tile):
- """
- Obtain tile image path based on tile information such as row, column, row pixel position, column pixel position,
- pixel width, and pixel height.
-
- Args:
- tile: Tile object.
-
- Returns:
- Path to image tile.
- """
- t = tile
- padded_sl_num = str(t.slide_num).zfill(3)
- tile_path = os.path.join(TILE_DIR, padded_sl_num,
- TRAIN_PREFIX + padded_sl_num + "-" + TILE_SUFFIX + "-r%d-c%d-x%d-y%d-w%d-h%d" % (
- t.r, t.c, t.o_c_s, t.o_r_s, t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s) + "." + DEST_TRAIN_EXT)
- return tile_path
-
-
-def get_tile_image_path_by_slide_row_col(slide_number, row, col):
- """
- Obtain tile image path using wildcard lookup with slide number, row, and column.
-
- Args:
- slide_number: The slide number.
- row: The row.
- col: The column.
-
- Returns:
- Path to image tile.
- """
- padded_sl_num = str(slide_number).zfill(3)
- wilcard_path = os.path.join(TILE_DIR, padded_sl_num,
- TRAIN_PREFIX + padded_sl_num + "-" + TILE_SUFFIX + "-r%d-c%d-*." % (
- row, col) + DEST_TRAIN_EXT)
- img_path = glob.glob(wilcard_path)[0]
- return img_path
-
-
-def get_training_image_path(slide_number, large_w=None, large_h=None, small_w=None, small_h=None):
- """
- Convert slide number and optional dimensions to a training image path. If no dimensions are supplied,
- the corresponding file based on the slide number will be looked up in the file system using a wildcard.
-
- Example:
- 5 -> ../data/training_png/TUPAC-TR-005-32x-49920x108288-1560x3384.png
-
- Args:
- slide_number: The slide number.
- large_w: Large image width.
- large_h: Large image height.
- small_w: Small image width.
- small_h: Small image height.
-
- Returns:
- Path to the image file.
- """
- padded_sl_num = str(slide_number).zfill(3)
- if large_w is None and large_h is None and small_w is None and small_h is None:
- wildcard_path = os.path.join(DEST_TRAIN_DIR, TRAIN_PREFIX + padded_sl_num + "*." + DEST_TRAIN_EXT)
- img_path = glob.glob(wildcard_path)[0]
- else:
- img_path = os.path.join(DEST_TRAIN_DIR, TRAIN_PREFIX + padded_sl_num + "-" + str(
- SCALE_FACTOR) + "x-" + DEST_TRAIN_SUFFIX + str(
- large_w) + "x" + str(large_h) + "-" + str(small_w) + "x" + str(small_h) + "." + DEST_TRAIN_EXT)
- return img_path
-
-
-def get_training_thumbnail_path(slide_number, large_w=None, large_h=None, small_w=None, small_h=None):
- """
- Convert slide number and optional dimensions to a training thumbnail path. If no dimensions are
- supplied, the corresponding file based on the slide number will be looked up in the file system using a wildcard.
-
- Example:
- 5 -> ../data/training_thumbnail_jpg/TUPAC-TR-005-32x-49920x108288-1560x3384.jpg
-
- Args:
- slide_number: The slide number.
- large_w: Large image width.
- large_h: Large image height.
- small_w: Small image width.
- small_h: Small image height.
-
- Returns:
- Path to the thumbnail file.
- """
- padded_sl_num = str(slide_number).zfill(3)
- if large_w is None and large_h is None and small_w is None and small_h is None:
- wilcard_path = os.path.join(DEST_TRAIN_THUMBNAIL_DIR, TRAIN_PREFIX + padded_sl_num + "*." + THUMBNAIL_EXT)
- img_path = glob.glob(wilcard_path)[0]
- else:
- img_path = os.path.join(DEST_TRAIN_THUMBNAIL_DIR, TRAIN_PREFIX + padded_sl_num + "-" + str(
- SCALE_FACTOR) + "x-" + DEST_TRAIN_SUFFIX + str(
- large_w) + "x" + str(large_h) + "-" + str(small_w) + "x" + str(small_h) + "." + THUMBNAIL_EXT)
- return img_path
-
-
-def get_filter_image_path(slide_number, filter_number, filter_name_info):
- """
- Convert slide number, filter number, and text to a path to a filter image file.
-
- Example:
- 5, 1, "rgb" -> ../data/filter_png/TUPAC-TR-005-001-rgb.png
-
- Args:
- slide_number: The slide number.
- filter_number: The filter number.
- filter_name_info: Descriptive text describing filter.
-
- Returns:
- Path to the filter image file.
- """
- dir = FILTER_DIR
- if not os.path.exists(dir):
- os.makedirs(dir)
- img_path = os.path.join(dir, get_filter_image_filename(slide_number, filter_number, filter_name_info))
- return img_path
-
-
-def get_filter_thumbnail_path(slide_number, filter_number, filter_name_info):
- """
- Convert slide number, filter number, and text to a path to a filter thumbnail file.
-
- Example:
- 5, 1, "rgb" -> ../data/filter_thumbnail_jpg/TUPAC-TR-005-001-rgb.jpg
-
- Args:
- slide_number: The slide number.
- filter_number: The filter number.
- filter_name_info: Descriptive text describing filter.
-
- Returns:
- Path to the filter thumbnail file.
- """
- dir = FILTER_THUMBNAIL_DIR
- if not os.path.exists(dir):
- os.makedirs(dir)
- img_path = os.path.join(dir, get_filter_image_filename(slide_number, filter_number, filter_name_info, thumbnail=True))
- return img_path
-
-
-def get_filter_image_filename(slide_number, filter_number, filter_name_info, thumbnail=False):
- """
- Convert slide number, filter number, and text to a filter file name.
-
- Example:
- 5, 1, "rgb", False -> TUPAC-TR-005-001-rgb.png
- 5, 1, "rgb", True -> TUPAC-TR-005-001-rgb.jpg
-
- Args:
- slide_number: The slide number.
- filter_number: The filter number.
- filter_name_info: Descriptive text describing filter.
- thumbnail: If True, produce thumbnail filename.
-
- Returns:
- The filter image or thumbnail file name.
- """
- if thumbnail:
- ext = THUMBNAIL_EXT
- else:
- ext = DEST_TRAIN_EXT
- padded_sl_num = str(slide_number).zfill(3)
- padded_fi_num = str(filter_number).zfill(3)
- img_filename = TRAIN_PREFIX + padded_sl_num + "-" + padded_fi_num + "-" + FILTER_SUFFIX + filter_name_info + "." + ext
- return img_filename
-
-
-def get_tile_summary_image_path(slide_number):
- """
- Convert slide number to a path to a tile summary image file.
-
- Example:
- 5 -> ../data/tile_summary_png/TUPAC-TR-005-tile_summary.png
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the tile summary image file.
- """
- if not os.path.exists(TILE_SUMMARY_DIR):
- os.makedirs(TILE_SUMMARY_DIR)
- img_path = os.path.join(TILE_SUMMARY_DIR, get_tile_summary_image_filename(slide_number))
- return img_path
-
-
-def get_tile_summary_thumbnail_path(slide_number):
- """
- Convert slide number to a path to a tile summary thumbnail file.
-
- Example:
- 5 -> ../data/tile_summary_thumbnail_jpg/TUPAC-TR-005-tile_summary.jpg
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the tile summary thumbnail file.
- """
- if not os.path.exists(TILE_SUMMARY_THUMBNAIL_DIR):
- os.makedirs(TILE_SUMMARY_THUMBNAIL_DIR)
- img_path = os.path.join(TILE_SUMMARY_THUMBNAIL_DIR, get_tile_summary_image_filename(slide_number, thumbnail=True))
- return img_path
-
-
-def get_tile_summary_on_original_image_path(slide_number):
- """
- Convert slide number to a path to a tile summary on original image file.
-
- Example:
- 5 -> ../data/tile_summary_on_original_png/TUPAC-TR-005-tile_summary.png
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the tile summary on original image file.
- """
- if not os.path.exists(TILE_SUMMARY_ON_ORIGINAL_DIR):
- os.makedirs(TILE_SUMMARY_ON_ORIGINAL_DIR)
- img_path = os.path.join(TILE_SUMMARY_ON_ORIGINAL_DIR, get_tile_summary_image_filename(slide_number))
- return img_path
-
-
-def get_tile_summary_on_original_thumbnail_path(slide_number):
- """
- Convert slide number to a path to a tile summary on original thumbnail file.
-
- Example:
- 5 -> ../data/tile_summary_on_original_thumbnail_jpg/TUPAC-TR-005-tile_summary.jpg
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the tile summary on original thumbnail file.
- """
- if not os.path.exists(TILE_SUMMARY_ON_ORIGINAL_THUMBNAIL_DIR):
- os.makedirs(TILE_SUMMARY_ON_ORIGINAL_THUMBNAIL_DIR)
- img_path = os.path.join(TILE_SUMMARY_ON_ORIGINAL_THUMBNAIL_DIR,
- get_tile_summary_image_filename(slide_number, thumbnail=True))
- return img_path
-
-
-def get_top_tiles_on_original_image_path(slide_number):
- """
- Convert slide number to a path to a top tiles on original image file.
-
- Example:
- 5 -> ../data/top_tiles_on_original_png/TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.png
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the top tiles on original image file.
- """
- if not os.path.exists(TOP_TILES_ON_ORIGINAL_DIR):
- os.makedirs(TOP_TILES_ON_ORIGINAL_DIR)
- img_path = os.path.join(TOP_TILES_ON_ORIGINAL_DIR, get_top_tiles_image_filename(slide_number))
- return img_path
-
-
-def get_top_tiles_on_original_thumbnail_path(slide_number):
- """
- Convert slide number to a path to a top tiles on original thumbnail file.
-
- Example:
- 5 -> ../data/top_tiles_on_original_thumbnail_jpg/TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.jpg
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the top tiles on original thumbnail file.
- """
- if not os.path.exists(TOP_TILES_ON_ORIGINAL_THUMBNAIL_DIR):
- os.makedirs(TOP_TILES_ON_ORIGINAL_THUMBNAIL_DIR)
- img_path = os.path.join(TOP_TILES_ON_ORIGINAL_THUMBNAIL_DIR,
- get_top_tiles_image_filename(slide_number, thumbnail=True))
- return img_path
-
-
-def get_tile_summary_image_filename(slide_number, thumbnail=False):
- """
- Convert slide number to a tile summary image file name.
-
- Example:
- 5, False -> TUPAC-TR-005-tile_summary.png
- 5, True -> TUPAC-TR-005-tile_summary.jpg
-
- Args:
- slide_number: The slide number.
- thumbnail: If True, produce thumbnail filename.
-
- Returns:
- The tile summary image file name.
- """
- if thumbnail:
- ext = THUMBNAIL_EXT
- else:
- ext = DEST_TRAIN_EXT
- padded_sl_num = str(slide_number).zfill(3)
-
- training_img_path = get_training_image_path(slide_number)
- large_w, large_h, small_w, small_h = parse_dimensions_from_image_filename(training_img_path)
- img_filename = TRAIN_PREFIX + padded_sl_num + "-" + str(SCALE_FACTOR) + "x-" + str(large_w) + "x" + str(
- large_h) + "-" + str(small_w) + "x" + str(small_h) + "-" + TILE_SUMMARY_SUFFIX + "." + ext
-
- return img_filename
-
-
-def get_top_tiles_image_filename(slide_number, thumbnail=False):
- """
- Convert slide number to a top tiles image file name.
-
- Example:
- 5, False -> TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.png
- 5, True -> TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.jpg
-
- Args:
- slide_number: The slide number.
- thumbnail: If True, produce thumbnail filename.
-
- Returns:
- The top tiles image file name.
- """
- if thumbnail:
- ext = THUMBNAIL_EXT
- else:
- ext = DEST_TRAIN_EXT
- padded_sl_num = str(slide_number).zfill(3)
-
- training_img_path = get_training_image_path(slide_number)
- large_w, large_h, small_w, small_h = parse_dimensions_from_image_filename(training_img_path)
- img_filename = TRAIN_PREFIX + padded_sl_num + "-" + str(SCALE_FACTOR) + "x-" + str(large_w) + "x" + str(
- large_h) + "-" + str(small_w) + "x" + str(small_h) + "-" + TOP_TILES_SUFFIX + "." + ext
-
- return img_filename
-
-
-def get_top_tiles_image_path(slide_number):
- """
- Convert slide number to a path to a top tiles image file.
-
- Example:
- 5 -> ../data/top_tiles_png/TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.png
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the top tiles image file.
- """
- if not os.path.exists(TOP_TILES_DIR):
- os.makedirs(TOP_TILES_DIR)
- img_path = os.path.join(TOP_TILES_DIR, get_top_tiles_image_filename(slide_number))
- return img_path
-
-
-def get_top_tiles_thumbnail_path(slide_number):
- """
- Convert slide number to a path to a tile summary thumbnail file.
-
- Example:
- 5 -> ../data/top_tiles_thumbnail_jpg/TUPAC-TR-005-32x-49920x108288-1560x3384-top_tiles.jpg
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the top tiles thumbnail file.
- """
- if not os.path.exists(TOP_TILES_THUMBNAIL_DIR):
- os.makedirs(TOP_TILES_THUMBNAIL_DIR)
- img_path = os.path.join(TOP_TILES_THUMBNAIL_DIR, get_top_tiles_image_filename(slide_number, thumbnail=True))
- return img_path
-
-
-def get_tile_data_filename(slide_number):
- """
- Convert slide number to a tile data file name.
-
- Example:
- 5 -> TUPAC-TR-005-32x-49920x108288-1560x3384-tile_data.csv
-
- Args:
- slide_number: The slide number.
-
- Returns:
- The tile data file name.
- """
- padded_sl_num = str(slide_number).zfill(3)
-
- training_img_path = get_training_image_path(slide_number)
- large_w, large_h, small_w, small_h = parse_dimensions_from_image_filename(training_img_path)
- data_filename = TRAIN_PREFIX + padded_sl_num + "-" + str(SCALE_FACTOR) + "x-" + str(large_w) + "x" + str(
- large_h) + "-" + str(small_w) + "x" + str(small_h) + "-" + TILE_DATA_SUFFIX + ".csv"
-
- return data_filename
-
-
-def get_tile_data_path(slide_number):
- """
- Convert slide number to a path to a tile data file.
-
- Example:
- 5 -> ../data/tile_data/TUPAC-TR-005-32x-49920x108288-1560x3384-tile_data.csv
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the tile data file.
- """
- if not os.path.exists(TILE_DATA_DIR):
- os.makedirs(TILE_DATA_DIR)
- file_path = os.path.join(TILE_DATA_DIR, get_tile_data_filename(slide_number))
- return file_path
-
-
-def get_filter_image_result(slide_number):
- """
- Convert slide number to the path to the file that is the final result of filtering.
-
- Example:
- 5 -> ../data/filter_png/TUPAC-TR-005-32x-49920x108288-1560x3384-filtered.png
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the filter image file.
- """
- padded_sl_num = str(slide_number).zfill(3)
- training_img_path = get_training_image_path(slide_number)
- large_w, large_h, small_w, small_h = parse_dimensions_from_image_filename(training_img_path)
- img_path = os.path.join(FILTER_DIR, TRAIN_PREFIX + padded_sl_num + "-" + str(
- SCALE_FACTOR) + "x-" + FILTER_SUFFIX + str(large_w) + "x" + str(large_h) + "-" + str(small_w) + "x" + str(
- small_h) + "-" + FILTER_RESULT_TEXT + "." + DEST_TRAIN_EXT)
- return img_path
-
-
-def get_filter_thumbnail_result(slide_number):
- """
- Convert slide number to the path to the file that is the final thumbnail result of filtering.
-
- Example:
- 5 -> ../data/filter_thumbnail_jpg/TUPAC-TR-005-32x-49920x108288-1560x3384-filtered.jpg
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Path to the filter thumbnail file.
- """
- padded_sl_num = str(slide_number).zfill(3)
- training_img_path = get_training_image_path(slide_number)
- large_w, large_h, small_w, small_h = parse_dimensions_from_image_filename(training_img_path)
- img_path = os.path.join(FILTER_THUMBNAIL_DIR, TRAIN_PREFIX + padded_sl_num + "-" + str(
- SCALE_FACTOR) + "x-" + FILTER_SUFFIX + str(large_w) + "x" + str(large_h) + "-" + str(small_w) + "x" + str(
- small_h) + "-" + FILTER_RESULT_TEXT + "." + THUMBNAIL_EXT)
- return img_path
-
-
-def parse_dimensions_from_image_filename(filename):
- """
- Parse an image filename to extract the original width and height and the converted width and height.
-
- Example:
- "TUPAC-TR-011-32x-97103x79079-3034x2471-tile_summary.png" -> (97103, 79079, 3034, 2471)
-
- Args:
- filename: The image filename.
-
- Returns:
- Tuple consisting of the original width, original height, the converted width, and the converted height.
- """
- m = re.match(".*-([\d]*)x([\d]*)-([\d]*)x([\d]*).*\..*", filename)
- large_w = int(m.group(1))
- large_h = int(m.group(2))
- small_w = int(m.group(3))
- small_h = int(m.group(4))
- return large_w, large_h, small_w, small_h
-
-
-def small_to_large_mapping(small_pixel, large_dimensions):
- """
- Map a scaled-down pixel width and height to the corresponding pixel of the original whole-slide image.
-
- Args:
- small_pixel: The scaled-down width and height.
- large_dimensions: The width and height of the original whole-slide image.
-
- Returns:
- Tuple consisting of the scaled-up width and height.
- """
- small_x, small_y = small_pixel
- large_w, large_h = large_dimensions
- large_x = round((large_w / SCALE_FACTOR) / math.floor(large_w / SCALE_FACTOR) * (SCALE_FACTOR * small_x))
- large_y = round((large_h / SCALE_FACTOR) / math.floor(large_h / SCALE_FACTOR) * (SCALE_FACTOR * small_y))
- return large_x, large_y
-
-
-def training_slide_to_image(slide_number):
- """
- Convert a WSI training slide to a saved scaled-down image in a format such as jpg or png.
-
- Args:
- slide_number: The slide number.
- """
-
- img, large_w, large_h, new_w, new_h = slide_to_scaled_pil_image(slide_number)
-
- img_path = get_training_image_path(slide_number, large_w, large_h, new_w, new_h)
- print("Saving image to: " + img_path)
- if not os.path.exists(DEST_TRAIN_DIR):
- os.makedirs(DEST_TRAIN_DIR)
- img.save(img_path)
-
- thumbnail_path = get_training_thumbnail_path(slide_number, large_w, large_h, new_w, new_h)
- save_thumbnail(img, THUMBNAIL_SIZE, thumbnail_path)
-
-
-def slide_to_scaled_pil_image(slide_number):
- """
- Convert a WSI training slide to a scaled-down PIL image.
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Tuple consisting of scaled-down PIL image, original width, original height, new width, and new height.
- """
- slide_filepath = get_training_slide_path(slide_number)
- print("Opening Slide #%d: %s" % (slide_number, slide_filepath))
- slide = open_slide(slide_filepath)
-
- large_w, large_h = slide.dimensions
- new_w = math.floor(large_w / SCALE_FACTOR)
- new_h = math.floor(large_h / SCALE_FACTOR)
- level = slide.get_best_level_for_downsample(SCALE_FACTOR)
- whole_slide_image = slide.read_region((0, 0), level, slide.level_dimensions[level])
- whole_slide_image = whole_slide_image.convert("RGB")
- img = whole_slide_image.resize((new_w, new_h), PIL.Image.BILINEAR)
- return img, large_w, large_h, new_w, new_h
-
-
-def slide_to_scaled_np_image(slide_number):
- """
- Convert a WSI training slide to a scaled-down NumPy image.
-
- Args:
- slide_number: The slide number.
-
- Returns:
- Tuple consisting of scaled-down NumPy image, original width, original height, new width, and new height.
- """
- pil_img, large_w, large_h, new_w, new_h = slide_to_scaled_pil_image(slide_number)
- np_img = util.pil_to_np_rgb(pil_img)
- return np_img, large_w, large_h, new_w, new_h
-
-
-def show_slide(slide_number):
- """
- Display a WSI slide on the screen, where the slide has been scaled down and converted to a PIL image.
-
- Args:
- slide_number: The slide number.
- """
- pil_img = slide_to_scaled_pil_image(slide_number)[0]
- pil_img.show()
-
-
-def save_thumbnail(pil_img, size, path, display_path=False):
- """
- Save a thumbnail of a PIL image, specifying the maximum width or height of the thumbnail.
-
- Args:
- pil_img: The PIL image to save as a thumbnail.
- size: The maximum width or height of the thumbnail.
- path: The path to the thumbnail.
- display_path: If True, display thumbnail path in console.
- """
- max_size = tuple(round(size * d / max(pil_img.size)) for d in pil_img.size)
- img = pil_img.resize(max_size, PIL.Image.BILINEAR)
- if display_path:
- print("Saving thumbnail to: " + path)
- dir = os.path.dirname(path)
- if dir != '' and not os.path.exists(dir):
- os.makedirs(dir)
- img.save(path)
-
-
-def get_num_training_slides():
- """
- Obtain the total number of WSI training slide images.
-
- Returns:
- The total number of WSI training slide images.
- """
- num_training_slides = len(glob.glob1(SRC_TRAIN_DIR, "*." + SRC_TRAIN_EXT))
- return num_training_slides
-
-
-def training_slide_range_to_images(start_ind, end_ind):
- """
- Convert a range of WSI training slides to smaller images (in a format such as jpg or png).
-
- Args:
- start_ind: Starting index (inclusive).
- end_ind: Ending index (inclusive).
-
- Returns:
- The starting index and the ending index of the slides that were converted.
- """
- for slide_num in range(start_ind, end_ind + 1):
- training_slide_to_image(slide_num)
- return (start_ind, end_ind)
-
-
-def singleprocess_training_slides_to_images():
- """
- Convert all WSI training slides to smaller images using a single process.
- """
- t = Time()
-
- num_train_images = get_num_training_slides()
- training_slide_range_to_images(1, num_train_images)
-
- t.elapsed_display()
-
-
-def multiprocess_training_slides_to_images():
- """
- Convert all WSI training slides to smaller images using multiple processes (one process per core).
- Each process will process a range of slide numbers.
- """
- timer = Time()
-
- # how many processes to use
- num_processes = multiprocessing.cpu_count()
- pool = multiprocessing.Pool(num_processes)
-
- num_train_images = get_num_training_slides()
- if num_processes > num_train_images:
- num_processes = num_train_images
- images_per_process = num_train_images / num_processes
-
- print("Number of processes: " + str(num_processes))
- print("Number of training images: " + str(num_train_images))
-
- # each task specifies a range of slides
- tasks = []
- for num_process in range(1, num_processes + 1):
- start_index = (num_process - 1) * images_per_process + 1
- end_index = num_process * images_per_process
- start_index = int(start_index)
- end_index = int(end_index)
- tasks.append((start_index, end_index))
- if start_index == end_index:
- print("Task #" + str(num_process) + ": Process slide " + str(start_index))
- else:
- print("Task #" + str(num_process) + ": Process slides " + str(start_index) + " to " + str(end_index))
-
- # start tasks
- results = []
- for t in tasks:
- results.append(pool.apply_async(training_slide_range_to_images, t))
-
- for result in results:
- (start_ind, end_ind) = result.get()
- if start_ind == end_ind:
- print("Done converting slide %d" % start_ind)
- else:
- print("Done converting slides %d through %d" % (start_ind, end_ind))
-
- timer.elapsed_display()
-
-
-def slide_stats():
- """
- Display statistics/graphs about training slides.
- """
- t = Time()
-
- if not os.path.exists(STATS_DIR):
- os.makedirs(STATS_DIR)
-
- num_train_images = get_num_training_slides()
- slide_stats = []
- for slide_num in range(1, num_train_images + 1):
- slide_filepath = get_training_slide_path(slide_num)
- print("Opening Slide #%d: %s" % (slide_num, slide_filepath))
- slide = open_slide(slide_filepath)
- (width, height) = slide.dimensions
- print(" Dimensions: {:,d} x {:,d}".format(width, height))
- slide_stats.append((width, height))
-
- max_width = 0
- max_height = 0
- min_width = sys.maxsize
- min_height = sys.maxsize
- total_width = 0
- total_height = 0
- total_size = 0
- which_max_width = 0
- which_max_height = 0
- which_min_width = 0
- which_min_height = 0
- max_size = 0
- min_size = sys.maxsize
- which_max_size = 0
- which_min_size = 0
- for z in range(0, num_train_images):
- (width, height) = slide_stats[z]
- if width > max_width:
- max_width = width
- which_max_width = z + 1
- if width < min_width:
- min_width = width
- which_min_width = z + 1
- if height > max_height:
- max_height = height
- which_max_height = z + 1
- if height < min_height:
- min_height = height
- which_min_height = z + 1
- size = width * height
- if size > max_size:
- max_size = size
- which_max_size = z + 1
- if size < min_size:
- min_size = size
- which_min_size = z + 1
- total_width = total_width + width
- total_height = total_height + height
- total_size = total_size + size
-
- avg_width = total_width / num_train_images
- avg_height = total_height / num_train_images
- avg_size = total_size / num_train_images
-
- stats_string = ""
- stats_string += "%-11s {:14,d} pixels (slide #%d)".format(max_width) % ("Max width:", which_max_width)
- stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(max_height) % ("Max height:", which_max_height)
- stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(max_size) % ("Max size:", which_max_size)
- stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_width) % ("Min width:", which_min_width)
- stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_height) % ("Min height:", which_min_height)
- stats_string += "\n%-11s {:14,d} pixels (slide #%d)".format(min_size) % ("Min size:", which_min_size)
- stats_string += "\n%-11s {:14,d} pixels".format(round(avg_width)) % "Avg width:"
- stats_string += "\n%-11s {:14,d} pixels".format(round(avg_height)) % "Avg height:"
- stats_string += "\n%-11s {:14,d} pixels".format(round(avg_size)) % "Avg size:"
- stats_string += "\n"
- print(stats_string)
-
- stats_string += "\nslide number,width,height"
- for i in range(0, len(slide_stats)):
- (width, height) = slide_stats[i]
- stats_string += "\n%d,%d,%d" % (i + 1, width, height)
- stats_string += "\n"
-
- stats_file = open(os.path.join(STATS_DIR, "stats.txt"), "w")
- stats_file.write(stats_string)
- stats_file.close()
-
- t.elapsed_display()
-
- x, y = zip(*slide_stats)
- colors = np.random.rand(num_train_images)
- sizes = [10 for n in range(num_train_images)]
- plt.scatter(x, y, s=sizes, c=colors, alpha=0.7)
- plt.xlabel("width (pixels)")
- plt.ylabel("height (pixels)")
- plt.title("SVS Image Sizes")
- plt.set_cmap("prism")
- plt.tight_layout()
- plt.savefig(os.path.join(STATS_DIR, "svs-image-sizes.png"))
- plt.show()
-
- plt.clf()
- plt.scatter(x, y, s=sizes, c=colors, alpha=0.7)
- plt.xlabel("width (pixels)")
- plt.ylabel("height (pixels)")
- plt.title("SVS Image Sizes (Labeled with slide numbers)")
- plt.set_cmap("prism")
- for i in range(num_train_images):
- snum = i + 1
- plt.annotate(str(snum), (x[i], y[i]))
- plt.tight_layout()
- plt.savefig(os.path.join(STATS_DIR, "svs-image-sizes-slide-numbers.png"))
- plt.show()
-
- plt.clf()
- area = [w * h / 1000000 for (w, h) in slide_stats]
- plt.hist(area, bins=64)
- plt.xlabel("width x height (M of pixels)")
- plt.ylabel("# images")
- plt.title("Distribution of image sizes in millions of pixels")
- plt.tight_layout()
- plt.savefig(os.path.join(STATS_DIR, "distribution-of-svs-image-sizes.png"))
- plt.show()
-
- plt.clf()
- whratio = [w / h for (w, h) in slide_stats]
- plt.hist(whratio, bins=64)
- plt.xlabel("width to height ratio")
- plt.ylabel("# images")
- plt.title("Image shapes (width to height)")
- plt.tight_layout()
- plt.savefig(os.path.join(STATS_DIR, "w-to-h.png"))
- plt.show()
-
- plt.clf()
- hwratio = [h / w for (w, h) in slide_stats]
- plt.hist(hwratio, bins=64)
- plt.xlabel("height to width ratio")
- plt.ylabel("# images")
- plt.title("Image shapes (height to width)")
- plt.tight_layout()
- plt.savefig(os.path.join(STATS_DIR, "h-to-w.png"))
- plt.show()
-
-
-def slide_info(display_all_properties=False):
- """
- Display information (such as properties) about training images.
-
- Args:
- display_all_properties: If True, display all available slide properties.
- """
- t = Time()
-
- num_train_images = get_num_training_slides()
- obj_pow_20_list = []
- obj_pow_40_list = []
- obj_pow_other_list = []
- for slide_num in range(1, num_train_images + 1):
- slide_filepath = get_training_slide_path(slide_num)
- print("\nOpening Slide #%d: %s" % (slide_num, slide_filepath))
- slide = open_slide(slide_filepath)
- print("Level count: %d" % slide.level_count)
- print("Level dimensions: " + str(slide.level_dimensions))
- print("Level downsamples: " + str(slide.level_downsamples))
- print("Dimensions: " + str(slide.dimensions))
- objective_power = int(slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER])
- print("Objective power: " + str(objective_power))
- if objective_power == 20:
- obj_pow_20_list.append(slide_num)
- elif objective_power == 40:
- obj_pow_40_list.append(slide_num)
- else:
- obj_pow_other_list.append(slide_num)
- print("Associated images:")
- for ai_key in slide.associated_images.keys():
- print(" " + str(ai_key) + ": " + str(slide.associated_images.get(ai_key)))
- print("Format: " + str(slide.detect_format(slide_filepath)))
- if display_all_properties:
- print("Properties:")
- for prop_key in slide.properties.keys():
- print(" Property: " + str(prop_key) + ", value: " + str(slide.properties.get(prop_key)))
-
- print("\n\nSlide Magnifications:")
- print(" 20x Slides: " + str(obj_pow_20_list))
- print(" 40x Slides: " + str(obj_pow_40_list))
- print(" ??x Slides: " + str(obj_pow_other_list) + "\n")
-
- t.elapsed_display()
-
-
-# if __name__ == "__main__":
- # show_slide(2)
- # slide_info(display_all_properties=True)
- # slide_stats()
-
- # training_slide_to_image(4)
- # img_path = get_training_image_path(4)
- # img = open_image(img_path)
- # img.show()
-
- # slide_to_scaled_pil_image(5)[0].show()
- # singleprocess_training_slides_to_images()
- # multiprocess_training_slides_to_images()
diff --git a/deephistopath/wsi/tiles.py b/deephistopath/wsi/tiles.py
deleted file mode 100644
index 887aedd..0000000
--- a/deephistopath/wsi/tiles.py
+++ /dev/null
@@ -1,1959 +0,0 @@
-# ------------------------------------------------------------------------
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ------------------------------------------------------------------------
-
-# To get around renderer issue on macOS going from Matplotlib image to NumPy image.
-import matplotlib
-
-matplotlib.use('Agg')
-
-import colorsys
-import math
-import matplotlib.pyplot as plt
-import multiprocessing
-import numpy as np
-import os
-from PIL import Image, ImageDraw, ImageFont
-from enum import Enum
-from deephistopath.wsi import util
-from deephistopath.wsi import filter
-from deephistopath.wsi import slide
-from deephistopath.wsi.util import Time
-
-TISSUE_HIGH_THRESH = 80
-TISSUE_LOW_THRESH = 10
-
-ROW_TILE_SIZE = 1024
-COL_TILE_SIZE = 1024
-NUM_TOP_TILES = 50
-
-DISPLAY_TILE_SUMMARY_LABELS = False
-TILE_LABEL_TEXT_SIZE = 10
-LABEL_ALL_TILES_IN_TOP_TILE_SUMMARY = False
-BORDER_ALL_TILES_IN_TOP_TILE_SUMMARY = False
-
-TILE_BORDER_SIZE = 2 # The size of the colored rectangular border around summary tiles.
-
-HIGH_COLOR = (0, 255, 0)
-MEDIUM_COLOR = (255, 255, 0)
-LOW_COLOR = (255, 165, 0)
-NONE_COLOR = (255, 0, 0)
-
-FADED_THRESH_COLOR = (128, 255, 128)
-FADED_MEDIUM_COLOR = (255, 255, 128)
-FADED_LOW_COLOR = (255, 210, 128)
-FADED_NONE_COLOR = (255, 128, 128)
-
-FONT_PATH = "/Library/Fonts/Arial Bold.ttf"
-SUMMARY_TITLE_FONT_PATH = "/Library/Fonts/Courier New Bold.ttf"
-SUMMARY_TITLE_TEXT_COLOR = (0, 0, 0)
-SUMMARY_TITLE_TEXT_SIZE = 24
-SUMMARY_TILE_TEXT_COLOR = (255, 255, 255)
-TILE_TEXT_COLOR = (0, 0, 0)
-TILE_TEXT_SIZE = 36
-TILE_TEXT_BACKGROUND_COLOR = (255, 255, 255)
-TILE_TEXT_W_BORDER = 5
-TILE_TEXT_H_BORDER = 4
-
-HSV_PURPLE = 270
-HSV_PINK = 330
-
-
-def get_num_tiles(rows, cols, row_tile_size, col_tile_size):
- """
- Obtain the number of vertical and horizontal tiles that an image can be divided into given a row tile size and
- a column tile size.
-
- Args:
- rows: Number of rows.
- cols: Number of columns.
- row_tile_size: Number of pixels in a tile row.
- col_tile_size: Number of pixels in a tile column.
-
- Returns:
- Tuple consisting of the number of vertical tiles and the number of horizontal tiles that the image can be divided
- into given the row tile size and the column tile size.
- """
- num_row_tiles = math.ceil(rows / row_tile_size)
- num_col_tiles = math.ceil(cols / col_tile_size)
- return num_row_tiles, num_col_tiles
-
-
-def get_tile_indices(rows, cols, row_tile_size, col_tile_size):
- """
- Obtain a list of tile coordinates (starting row, ending row, starting column, ending column, row number, column number).
-
- Args:
- rows: Number of rows.
- cols: Number of columns.
- row_tile_size: Number of pixels in a tile row.
- col_tile_size: Number of pixels in a tile column.
-
- Returns:
- List of tuples representing tile coordinates consisting of starting row, ending row,
- starting column, ending column, row number, column number.
- """
- indices = list()
- num_row_tiles, num_col_tiles = get_num_tiles(rows, cols, row_tile_size, col_tile_size)
- for r in range(0, num_row_tiles):
- start_r = r * row_tile_size
- end_r = ((r + 1) * row_tile_size) if (r < num_row_tiles - 1) else rows
- for c in range(0, num_col_tiles):
- start_c = c * col_tile_size
- end_c = ((c + 1) * col_tile_size) if (c < num_col_tiles - 1) else cols
- indices.append((start_r, end_r, start_c, end_c, r + 1, c + 1))
- return indices
-
-
-def create_summary_pil_img(np_img, title_area_height, row_tile_size, col_tile_size, num_row_tiles, num_col_tiles):
- """
- Create a PIL summary image including top title area and right side and bottom padding.
-
- Args:
- np_img: Image as a NumPy array.
- title_area_height: Height of the title area at the top of the summary image.
- row_tile_size: The tile size in rows.
- col_tile_size: The tile size in columns.
- num_row_tiles: The number of row tiles.
- num_col_tiles: The number of column tiles.
-
- Returns:
- Summary image as a PIL image. This image contains the image data specified by the np_img input and also has
- potentially a top title area and right side and bottom padding.
- """
- r = row_tile_size * num_row_tiles + title_area_height
- c = col_tile_size * num_col_tiles
- summary_img = np.zeros([r, c, np_img.shape[2]], dtype=np.uint8)
- # add gray edges so that tile text does not get cut off
- summary_img.fill(120)
- # color title area white
- summary_img[0:title_area_height, 0:summary_img.shape[1]].fill(255)
- summary_img[title_area_height:np_img.shape[0] + title_area_height, 0:np_img.shape[1]] = np_img
- summary = util.np_to_pil(summary_img)
- return summary
-
-
-def generate_tile_summaries(tile_sum, np_img, display=True, save_summary=False):
- """
- Generate summary images/thumbnails showing a 'heatmap' representation of the tissue segmentation of all tiles.
-
- Args:
- tile_sum: TileSummary object.
- np_img: Image as a NumPy array.
- display: If True, display tile summary to screen.
- save_summary: If True, save tile summary images.
- """
- z = 300 # height of area at top of summary slide
- slide_num = tile_sum.slide_num
- rows = tile_sum.scaled_h
- cols = tile_sum.scaled_w
- row_tile_size = tile_sum.scaled_tile_h
- col_tile_size = tile_sum.scaled_tile_w
- num_row_tiles, num_col_tiles = get_num_tiles(rows, cols, row_tile_size, col_tile_size)
- summary = create_summary_pil_img(np_img, z, row_tile_size, col_tile_size, num_row_tiles, num_col_tiles)
- draw = ImageDraw.Draw(summary)
-
- original_img_path = slide.get_training_image_path(slide_num)
- np_orig = slide.open_image_np(original_img_path)
- summary_orig = create_summary_pil_img(np_orig, z, row_tile_size, col_tile_size, num_row_tiles, num_col_tiles)
- draw_orig = ImageDraw.Draw(summary_orig)
-
- for t in tile_sum.tiles:
- border_color = tile_border_color(t.tissue_percentage)
- tile_border(draw, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color)
- tile_border(draw_orig, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color)
-
- summary_txt = summary_title(tile_sum) + "\n" + summary_stats(tile_sum)
-
- summary_font = ImageFont.truetype(SUMMARY_TITLE_FONT_PATH, size=SUMMARY_TITLE_TEXT_SIZE)
- draw.text((5, 5), summary_txt, SUMMARY_TITLE_TEXT_COLOR, font=summary_font)
- draw_orig.text((5, 5), summary_txt, SUMMARY_TITLE_TEXT_COLOR, font=summary_font)
-
- if DISPLAY_TILE_SUMMARY_LABELS:
- count = 0
- for t in tile_sum.tiles:
- count += 1
- label = "R%d\nC%d" % (t.r, t.c)
- font = ImageFont.truetype(FONT_PATH, size=TILE_LABEL_TEXT_SIZE)
- # drop shadow behind text
- draw.text(((t.c_s + 3), (t.r_s + 3 + z)), label, (0, 0, 0), font=font)
- draw_orig.text(((t.c_s + 3), (t.r_s + 3 + z)), label, (0, 0, 0), font=font)
-
- draw.text(((t.c_s + 2), (t.r_s + 2 + z)), label, SUMMARY_TILE_TEXT_COLOR, font=font)
- draw_orig.text(((t.c_s + 2), (t.r_s + 2 + z)), label, SUMMARY_TILE_TEXT_COLOR, font=font)
-
- if display:
- summary.show()
- summary_orig.show()
- if save_summary:
- save_tile_summary_image(summary, slide_num)
- save_tile_summary_on_original_image(summary_orig, slide_num)
-
-
-def generate_top_tile_summaries(tile_sum, np_img, display=True, save_summary=False, show_top_stats=True,
- label_all_tiles=LABEL_ALL_TILES_IN_TOP_TILE_SUMMARY,
- border_all_tiles=BORDER_ALL_TILES_IN_TOP_TILE_SUMMARY):
- """
- Generate summary images/thumbnails showing the top tiles ranked by score.
-
- Args:
- tile_sum: TileSummary object.
- np_img: Image as a NumPy array.
- display: If True, display top tiles to screen.
- save_summary: If True, save top tiles images.
- show_top_stats: If True, append top tile score stats to image.
- label_all_tiles: If True, label all tiles. If False, label only top tiles.
- """
- z = 300 # height of area at top of summary slide
- slide_num = tile_sum.slide_num
- rows = tile_sum.scaled_h
- cols = tile_sum.scaled_w
- row_tile_size = tile_sum.scaled_tile_h
- col_tile_size = tile_sum.scaled_tile_w
- num_row_tiles, num_col_tiles = get_num_tiles(rows, cols, row_tile_size, col_tile_size)
- summary = create_summary_pil_img(np_img, z, row_tile_size, col_tile_size, num_row_tiles, num_col_tiles)
- draw = ImageDraw.Draw(summary)
-
- original_img_path = slide.get_training_image_path(slide_num)
- np_orig = slide.open_image_np(original_img_path)
- summary_orig = create_summary_pil_img(np_orig, z, row_tile_size, col_tile_size, num_row_tiles, num_col_tiles)
- draw_orig = ImageDraw.Draw(summary_orig)
-
- if border_all_tiles:
- for t in tile_sum.tiles:
- border_color = faded_tile_border_color(t.tissue_percentage)
- tile_border(draw, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color, border_size=1)
- tile_border(draw_orig, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color, border_size=1)
-
- tbs = TILE_BORDER_SIZE
- top_tiles = tile_sum.top_tiles()
- for t in top_tiles:
- border_color = tile_border_color(t.tissue_percentage)
- tile_border(draw, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color)
- tile_border(draw_orig, t.r_s + z, t.r_e + z, t.c_s, t.c_e, border_color)
- if border_all_tiles:
- tile_border(draw, t.r_s + z + tbs, t.r_e + z - tbs, t.c_s + tbs, t.c_e - tbs, (0, 0, 0))
- tile_border(draw_orig, t.r_s + z + tbs, t.r_e + z - tbs, t.c_s + tbs, t.c_e - tbs, (0, 0, 0))
-
- summary_title = "Slide %03d Top Tile Summary:" % slide_num
- summary_txt = summary_title + "\n" + summary_stats(tile_sum)
-
- summary_font = ImageFont.truetype(SUMMARY_TITLE_FONT_PATH, size=SUMMARY_TITLE_TEXT_SIZE)
- draw.text((5, 5), summary_txt, SUMMARY_TITLE_TEXT_COLOR, font=summary_font)
- draw_orig.text((5, 5), summary_txt, SUMMARY_TITLE_TEXT_COLOR, font=summary_font)
-
- tiles_to_label = tile_sum.tiles if label_all_tiles else top_tiles
- h_offset = TILE_BORDER_SIZE + 2
- v_offset = TILE_BORDER_SIZE
- h_ds_offset = TILE_BORDER_SIZE + 3
- v_ds_offset = TILE_BORDER_SIZE + 1
- for t in tiles_to_label:
- label = "R%d\nC%d" % (t.r, t.c)
- font = ImageFont.truetype(FONT_PATH, size=TILE_LABEL_TEXT_SIZE)
- # drop shadow behind text
- draw.text(((t.c_s + h_ds_offset), (t.r_s + v_ds_offset + z)), label, (0, 0, 0), font=font)
- draw_orig.text(((t.c_s + h_ds_offset), (t.r_s + v_ds_offset + z)), label, (0, 0, 0), font=font)
-
- draw.text(((t.c_s + h_offset), (t.r_s + v_offset + z)), label, SUMMARY_TILE_TEXT_COLOR, font=font)
- draw_orig.text(((t.c_s + h_offset), (t.r_s + v_offset + z)), label, SUMMARY_TILE_TEXT_COLOR, font=font)
-
- if show_top_stats:
- summary = add_tile_stats_to_top_tile_summary(summary, top_tiles, z)
- summary_orig = add_tile_stats_to_top_tile_summary(summary_orig, top_tiles, z)
-
- if display:
- summary.show()
- summary_orig.show()
- if save_summary:
- save_top_tiles_image(summary, slide_num)
- save_top_tiles_on_original_image(summary_orig, slide_num)
-
-
-def add_tile_stats_to_top_tile_summary(pil_img, tiles, z):
- np_sum = util.pil_to_np_rgb(pil_img)
- sum_r, sum_c, sum_ch = np_sum.shape
- np_stats = np_tile_stat_img(tiles)
- st_r, st_c, _ = np_stats.shape
- combo_c = sum_c + st_c
- combo_r = max(sum_r, st_r + z)
- combo = np.zeros([combo_r, combo_c, sum_ch], dtype=np.uint8)
- combo.fill(255)
- combo[0:sum_r, 0:sum_c] = np_sum
- combo[z:st_r + z, sum_c:sum_c + st_c] = np_stats
- result = util.np_to_pil(combo)
- return result
-
-
-def np_tile_stat_img(tiles):
- """
- Generate tile scoring statistics for a list of tiles and return the result as a NumPy array image.
-
- Args:
- tiles: List of tiles (such as top tiles)
-
- Returns:
- Tile scoring statistics converted into an NumPy array image.
- """
- tt = sorted(tiles, key=lambda t: (t.r, t.c), reverse=False)
- tile_stats = "Tile Score Statistics:\n"
- count = 0
- for t in tt:
- if count > 0:
- tile_stats += "\n"
- count += 1
- tup = (t.r, t.c, t.rank, t.tissue_percentage, t.color_factor, t.s_and_v_factor, t.quantity_factor, t.score)
- tile_stats += "R%03d C%03d #%003d TP:%6.2f%% CF:%4.0f SVF:%4.2f QF:%4.2f S:%0.4f" % tup
- np_stats = np_text(tile_stats, font_path=SUMMARY_TITLE_FONT_PATH, font_size=14)
- return np_stats
-
-
-def tile_border_color(tissue_percentage):
- """
- Obtain the corresponding tile border color for a particular tile tissue percentage.
-
- Args:
- tissue_percentage: The tile tissue percentage
-
- Returns:
- The tile border color corresponding to the tile tissue percentage.
- """
- if tissue_percentage >= TISSUE_HIGH_THRESH:
- border_color = HIGH_COLOR
- elif (tissue_percentage >= TISSUE_LOW_THRESH) and (tissue_percentage < TISSUE_HIGH_THRESH):
- border_color = MEDIUM_COLOR
- elif (tissue_percentage > 0) and (tissue_percentage < TISSUE_LOW_THRESH):
- border_color = LOW_COLOR
- else:
- border_color = NONE_COLOR
- return border_color
-
-
-def faded_tile_border_color(tissue_percentage):
- """
- Obtain the corresponding faded tile border color for a particular tile tissue percentage.
-
- Args:
- tissue_percentage: The tile tissue percentage
-
- Returns:
- The faded tile border color corresponding to the tile tissue percentage.
- """
- if tissue_percentage >= TISSUE_HIGH_THRESH:
- border_color = FADED_THRESH_COLOR
- elif (tissue_percentage >= TISSUE_LOW_THRESH) and (tissue_percentage < TISSUE_HIGH_THRESH):
- border_color = FADED_MEDIUM_COLOR
- elif (tissue_percentage > 0) and (tissue_percentage < TISSUE_LOW_THRESH):
- border_color = FADED_LOW_COLOR
- else:
- border_color = FADED_NONE_COLOR
- return border_color
-
-
-def summary_title(tile_summary):
- """
- Obtain tile summary title.
-
- Args:
- tile_summary: TileSummary object.
-
- Returns:
- The tile summary title.
- """
- return "Slide %03d Tile Summary:" % tile_summary.slide_num
-
-
-def summary_stats(tile_summary):
- """
- Obtain various stats about the slide tiles.
-
- Args:
- tile_summary: TileSummary object.
-
- Returns:
- Various stats about the slide tiles as a string.
- """
- return "Original Dimensions: %dx%d\n" % (tile_summary.orig_w, tile_summary.orig_h) + \
- "Original Tile Size: %dx%d\n" % (tile_summary.orig_tile_w, tile_summary.orig_tile_h) + \
- "Scale Factor: 1/%dx\n" % tile_summary.scale_factor + \
- "Scaled Dimensions: %dx%d\n" % (tile_summary.scaled_w, tile_summary.scaled_h) + \
- "Scaled Tile Size: %dx%d\n" % (tile_summary.scaled_tile_w, tile_summary.scaled_tile_w) + \
- "Total Mask: %3.2f%%, Total Tissue: %3.2f%%\n" % (
- tile_summary.mask_percentage(), tile_summary.tissue_percentage) + \
- "Tiles: %dx%d = %d\n" % (tile_summary.num_col_tiles, tile_summary.num_row_tiles, tile_summary.count) + \
- " %5d (%5.2f%%) tiles >=%d%% tissue\n" % (
- tile_summary.high, tile_summary.high / tile_summary.count * 100, TISSUE_HIGH_THRESH) + \
- " %5d (%5.2f%%) tiles >=%d%% and <%d%% tissue\n" % (
- tile_summary.medium, tile_summary.medium / tile_summary.count * 100, TISSUE_LOW_THRESH,
- TISSUE_HIGH_THRESH) + \
- " %5d (%5.2f%%) tiles >0%% and <%d%% tissue\n" % (
- tile_summary.low, tile_summary.low / tile_summary.count * 100, TISSUE_LOW_THRESH) + \
- " %5d (%5.2f%%) tiles =0%% tissue" % (tile_summary.none, tile_summary.none / tile_summary.count * 100)
-
-
-def tile_border(draw, r_s, r_e, c_s, c_e, color, border_size=TILE_BORDER_SIZE):
- """
- Draw a border around a tile with width TILE_BORDER_SIZE.
-
- Args:
- draw: Draw object for drawing on PIL image.
- r_s: Row starting pixel.
- r_e: Row ending pixel.
- c_s: Column starting pixel.
- c_e: Column ending pixel.
- color: Color of the border.
- border_size: Width of tile border in pixels.
- """
- for x in range(0, border_size):
- draw.rectangle([(c_s + x, r_s + x), (c_e - 1 - x, r_e - 1 - x)], outline=color)
-
-
-def save_tile_summary_image(pil_img, slide_num):
- """
- Save a tile summary image and thumbnail to the file system.
-
- Args:
- pil_img: Image as a PIL Image.
- slide_num: The slide number.
- """
- t = Time()
- filepath = slide.get_tile_summary_image_path(slide_num)
- pil_img.save(filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Tile Sum", str(t.elapsed()), filepath))
-
- t = Time()
- thumbnail_filepath = slide.get_tile_summary_thumbnail_path(slide_num)
- slide.save_thumbnail(pil_img, slide.THUMBNAIL_SIZE, thumbnail_filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Tile Sum Thumb", str(t.elapsed()), thumbnail_filepath))
-
-
-def save_top_tiles_image(pil_img, slide_num):
- """
- Save a top tiles image and thumbnail to the file system.
-
- Args:
- pil_img: Image as a PIL Image.
- slide_num: The slide number.
- """
- t = Time()
- filepath = slide.get_top_tiles_image_path(slide_num)
- pil_img.save(filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Top Tiles Image", str(t.elapsed()), filepath))
-
- t = Time()
- thumbnail_filepath = slide.get_top_tiles_thumbnail_path(slide_num)
- slide.save_thumbnail(pil_img, slide.THUMBNAIL_SIZE, thumbnail_filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Top Tiles Thumb", str(t.elapsed()), thumbnail_filepath))
-
-
-def save_tile_summary_on_original_image(pil_img, slide_num):
- """
- Save a tile summary on original image and thumbnail to the file system.
-
- Args:
- pil_img: Image as a PIL Image.
- slide_num: The slide number.
- """
- t = Time()
- filepath = slide.get_tile_summary_on_original_image_path(slide_num)
- pil_img.save(filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Tile Sum Orig", str(t.elapsed()), filepath))
-
- t = Time()
- thumbnail_filepath = slide.get_tile_summary_on_original_thumbnail_path(slide_num)
- slide.save_thumbnail(pil_img, slide.THUMBNAIL_SIZE, thumbnail_filepath)
- print(
- "%-20s | Time: %-14s Name: %s" % ("Save Tile Sum Orig T", str(t.elapsed()), thumbnail_filepath))
-
-
-def save_top_tiles_on_original_image(pil_img, slide_num):
- """
- Save a top tiles on original image and thumbnail to the file system.
-
- Args:
- pil_img: Image as a PIL Image.
- slide_num: The slide number.
- """
- t = Time()
- filepath = slide.get_top_tiles_on_original_image_path(slide_num)
- pil_img.save(filepath)
- print("%-20s | Time: %-14s Name: %s" % ("Save Top Orig", str(t.elapsed()), filepath))
-
- t = Time()
- thumbnail_filepath = slide.get_top_tiles_on_original_thumbnail_path(slide_num)
- slide.save_thumbnail(pil_img, slide.THUMBNAIL_SIZE, thumbnail_filepath)
- print(
- "%-20s | Time: %-14s Name: %s" % ("Save Top Orig Thumb", str(t.elapsed()), thumbnail_filepath))
-
-
-def summary_and_tiles(slide_num, display=True, save_summary=False, save_data=True, save_top_tiles=True):
- """
- Generate tile summary and top tiles for slide.
-
- Args:
- slide_num: The slide number.
- display: If True, display tile summary to screen.
- save_summary: If True, save tile summary images.
- save_data: If True, save tile data to csv file.
- save_top_tiles: If True, save top tiles to files.
-
- """
- img_path = slide.get_filter_image_result(slide_num)
- np_img = slide.open_image_np(img_path)
-
- tile_sum = score_tiles(slide_num, np_img)
- if save_data:
- save_tile_data(tile_sum)
- generate_tile_summaries(tile_sum, np_img, display=display, save_summary=save_summary)
- generate_top_tile_summaries(tile_sum, np_img, display=display, save_summary=save_summary)
- if save_top_tiles:
- for tile in tile_sum.top_tiles():
- tile.save_tile()
- return tile_sum
-
-
-def save_tile_data(tile_summary):
- """
- Save tile data to csv file.
-
- Args
- tile_summary: TimeSummary object.
- """
-
- time = Time()
-
- csv = summary_title(tile_summary) + "\n" + summary_stats(tile_summary)
-
- csv += "\n\n\nTile Num,Row,Column,Tissue %,Tissue Quantity,Col Start,Row Start,Col End,Row End,Col Size,Row Size," + \
- "Original Col Start,Original Row Start,Original Col End,Original Row End,Original Col Size,Original Row Size," + \
- "Color Factor,S and V Factor,Quantity Factor,Score\n"
-
- for t in tile_summary.tiles:
- line = "%d,%d,%d,%4.2f,%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%4.0f,%4.2f,%4.2f,%0.4f\n" % (
- t.tile_num, t.r, t.c, t.tissue_percentage, t.tissue_quantity().name, t.c_s, t.r_s, t.c_e, t.r_e, t.c_e - t.c_s,
- t.r_e - t.r_s, t.o_c_s, t.o_r_s, t.o_c_e, t.o_r_e, t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s, t.color_factor,
- t.s_and_v_factor, t.quantity_factor, t.score)
- csv += line
-
- data_path = slide.get_tile_data_path(tile_summary.slide_num)
- csv_file = open(data_path, "w")
- csv_file.write(csv)
- csv_file.close()
-
- print("%-20s | Time: %-14s Name: %s" % ("Save Tile Data", str(time.elapsed()), data_path))
-
-
-def tile_to_pil_tile(tile):
- """
- Convert tile information into the corresponding tile as a PIL image read from the whole-slide image file.
-
- Args:
- tile: Tile object.
-
- Return:
- Tile as a PIL image.
- """
- t = tile
- slide_filepath = slide.get_training_slide_path(t.slide_num)
- s = slide.open_slide(slide_filepath)
-
- x, y = t.o_c_s, t.o_r_s
- w, h = t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s
- tile_region = s.read_region((x, y), 0, (w, h))
- # RGBA to RGB
- pil_img = tile_region.convert("RGB")
- return pil_img
-
-
-def tile_to_np_tile(tile):
- """
- Convert tile information into the corresponding tile as a NumPy image read from the whole-slide image file.
-
- Args:
- tile: Tile object.
-
- Return:
- Tile as a NumPy image.
- """
- pil_img = tile_to_pil_tile(tile)
- np_img = util.pil_to_np_rgb(pil_img)
- return np_img
-
-
-def save_display_tile(tile, save=True, display=False):
- """
- Save and/or display a tile image.
-
- Args:
- tile: Tile object.
- save: If True, save tile image.
- display: If True, dispaly tile image.
- """
- tile_pil_img = tile_to_pil_tile(tile)
-
- if save:
- t = Time()
- img_path = slide.get_tile_image_path(tile)
- dir = os.path.dirname(img_path)
- if not os.path.exists(dir):
- os.makedirs(dir)
- tile_pil_img.save(img_path)
- print("%-20s | Time: %-14s Name: %s" % ("Save Tile", str(t.elapsed()), img_path))
-
- if display:
- tile_pil_img.show()
-
-
-def score_tiles(slide_num, np_img=None, dimensions=None, small_tile_in_tile=False):
- """
- Score all tiles for a slide and return the results in a TileSummary object.
-
- Args:
- slide_num: The slide number.
- np_img: Optional image as a NumPy array.
- dimensions: Optional tuple consisting of (original width, original height, new width, new height). Used for dynamic
- tile retrieval.
- small_tile_in_tile: If True, include the small NumPy image in the Tile objects.
-
- Returns:
- TileSummary object which includes a list of Tile objects containing information about each tile.
- """
- if dimensions is None:
- img_path = slide.get_filter_image_result(slide_num)
- o_w, o_h, w, h = slide.parse_dimensions_from_image_filename(img_path)
- else:
- o_w, o_h, w, h = dimensions
-
- if np_img is None:
- np_img = slide.open_image_np(img_path)
-
- row_tile_size = round(ROW_TILE_SIZE / slide.SCALE_FACTOR) # use round?
- col_tile_size = round(COL_TILE_SIZE / slide.SCALE_FACTOR) # use round?
-
- num_row_tiles, num_col_tiles = get_num_tiles(h, w, row_tile_size, col_tile_size)
-
- tile_sum = TileSummary(slide_num=slide_num,
- orig_w=o_w,
- orig_h=o_h,
- orig_tile_w=COL_TILE_SIZE,
- orig_tile_h=ROW_TILE_SIZE,
- scaled_w=w,
- scaled_h=h,
- scaled_tile_w=col_tile_size,
- scaled_tile_h=row_tile_size,
- tissue_percentage=filter.tissue_percent(np_img),
- num_col_tiles=num_col_tiles,
- num_row_tiles=num_row_tiles)
-
- count = 0
- high = 0
- medium = 0
- low = 0
- none = 0
- tile_indices = get_tile_indices(h, w, row_tile_size, col_tile_size)
- for t in tile_indices:
- count += 1 # tile_num
- r_s, r_e, c_s, c_e, r, c = t
- np_tile = np_img[r_s:r_e, c_s:c_e]
- t_p = filter.tissue_percent(np_tile)
- amount = tissue_quantity(t_p)
- if amount == TissueQuantity.HIGH:
- high += 1
- elif amount == TissueQuantity.MEDIUM:
- medium += 1
- elif amount == TissueQuantity.LOW:
- low += 1
- elif amount == TissueQuantity.NONE:
- none += 1
- o_c_s, o_r_s = slide.small_to_large_mapping((c_s, r_s), (o_w, o_h))
- o_c_e, o_r_e = slide.small_to_large_mapping((c_e, r_e), (o_w, o_h))
-
- # pixel adjustment in case tile dimension too large (for example, 1025 instead of 1024)
- if (o_c_e - o_c_s) > COL_TILE_SIZE:
- o_c_e -= 1
- if (o_r_e - o_r_s) > ROW_TILE_SIZE:
- o_r_e -= 1
-
- score, color_factor, s_and_v_factor, quantity_factor = score_tile(np_tile, t_p, slide_num, r, c)
-
- np_scaled_tile = np_tile if small_tile_in_tile else None
- tile = Tile(tile_sum, slide_num, np_scaled_tile, count, r, c, r_s, r_e, c_s, c_e, o_r_s, o_r_e, o_c_s,
- o_c_e, t_p, color_factor, s_and_v_factor, quantity_factor, score)
- tile_sum.tiles.append(tile)
-
- tile_sum.count = count
- tile_sum.high = high
- tile_sum.medium = medium
- tile_sum.low = low
- tile_sum.none = none
-
- tiles_by_score = tile_sum.tiles_by_score()
- rank = 0
- for t in tiles_by_score:
- rank += 1
- t.rank = rank
-
- return tile_sum
-
-
-def score_tile(np_tile, tissue_percent, slide_num, row, col):
- """
- Score tile based on tissue percentage, color factor, saturation/value factor, and tissue quantity factor.
-
- Args:
- np_tile: Tile as NumPy array.
- tissue_percent: The percentage of the tile judged to be tissue.
- slide_num: Slide number.
- row: Tile row.
- col: Tile column.
-
- Returns tuple consisting of score, color factor, saturation/value factor, and tissue quantity factor.
- """
- color_factor = hsv_purple_pink_factor(np_tile)
- s_and_v_factor = hsv_saturation_and_value_factor(np_tile)
- amount = tissue_quantity(tissue_percent)
- quantity_factor = tissue_quantity_factor(amount)
- combined_factor = color_factor * s_and_v_factor * quantity_factor
- score = (tissue_percent ** 2) * np.log(1 + combined_factor) / 1000.0
- # scale score to between 0 and 1
- score = 1.0 - (10.0 / (10.0 + score))
- return score, color_factor, s_and_v_factor, quantity_factor
-
-
-def tissue_quantity_factor(amount):
- """
- Obtain a scoring factor based on the quantity of tissue in a tile.
-
- Args:
- amount: Tissue amount as a TissueQuantity enum value.
-
- Returns:
- Scoring factor based on the tile tissue quantity.
- """
- if amount == TissueQuantity.HIGH:
- quantity_factor = 1.0
- elif amount == TissueQuantity.MEDIUM:
- quantity_factor = 0.2
- elif amount == TissueQuantity.LOW:
- quantity_factor = 0.1
- else:
- quantity_factor = 0.0
- return quantity_factor
-
-
-def tissue_quantity(tissue_percentage):
- """
- Obtain TissueQuantity enum member (HIGH, MEDIUM, LOW, or NONE) for corresponding tissue percentage.
-
- Args:
- tissue_percentage: The tile tissue percentage.
-
- Returns:
- TissueQuantity enum member (HIGH, MEDIUM, LOW, or NONE).
- """
- if tissue_percentage >= TISSUE_HIGH_THRESH:
- return TissueQuantity.HIGH
- elif (tissue_percentage >= TISSUE_LOW_THRESH) and (tissue_percentage < TISSUE_HIGH_THRESH):
- return TissueQuantity.MEDIUM
- elif (tissue_percentage > 0) and (tissue_percentage < TISSUE_LOW_THRESH):
- return TissueQuantity.LOW
- else:
- return TissueQuantity.NONE
-
-
-def image_list_to_tiles(image_num_list, display=False, save_summary=True, save_data=True, save_top_tiles=True):
- """
- Generate tile summaries and tiles for a list of images.
-
- Args:
- image_num_list: List of image numbers.
- display: If True, display tile summary images to screen.
- save_summary: If True, save tile summary images.
- save_data: If True, save tile data to csv file.
- save_top_tiles: If True, save top tiles to files.
- """
- tile_summaries_dict = dict()
- for slide_num in image_num_list:
- tile_summary = summary_and_tiles(slide_num, display, save_summary, save_data, save_top_tiles)
- tile_summaries_dict[slide_num] = tile_summary
- return image_num_list, tile_summaries_dict
-
-
-def image_range_to_tiles(start_ind, end_ind, display=False, save_summary=True, save_data=True, save_top_tiles=True):
- """
- Generate tile summaries and tiles for a range of images.
-
- Args:
- start_ind: Starting index (inclusive).
- end_ind: Ending index (inclusive).
- display: If True, display tile summary images to screen.
- save_summary: If True, save tile summary images.
- save_data: If True, save tile data to csv file.
- save_top_tiles: If True, save top tiles to files.
- """
- image_num_list = list()
- tile_summaries_dict = dict()
- for slide_num in range(start_ind, end_ind + 1):
- tile_summary = summary_and_tiles(slide_num, display, save_summary, save_data, save_top_tiles)
- image_num_list.append(slide_num)
- tile_summaries_dict[slide_num] = tile_summary
- return image_num_list, tile_summaries_dict
-
-
-def singleprocess_filtered_images_to_tiles(display=False, save_summary=True, save_data=True, save_top_tiles=True,
- html=True, image_num_list=None):
- """
- Generate tile summaries and tiles for training images using a single process.
-
- Args:
- display: If True, display tile summary images to screen.
- save_summary: If True, save tile summary images.
- save_data: If True, save tile data to csv file.
- save_top_tiles: If True, save top tiles to files.
- html: If True, generate HTML page to display tiled images
- image_num_list: Optionally specify a list of image slide numbers.
- """
- t = Time()
- print("Generating tile summaries\n")
-
- if image_num_list is not None:
- image_num_list, tile_summaries_dict = image_list_to_tiles(image_num_list, display, save_summary, save_data,
- save_top_tiles)
- else:
- num_training_slides = slide.get_num_training_slides()
- image_num_list, tile_summaries_dict = image_range_to_tiles(1, num_training_slides, display, save_summary, save_data,
- save_top_tiles)
-
- print("Time to generate tile summaries: %s\n" % str(t.elapsed()))
-
- if html:
- generate_tiled_html_result(image_num_list, tile_summaries_dict, save_data)
-
-
-def multiprocess_filtered_images_to_tiles(display=False, save_summary=True, save_data=True, save_top_tiles=True,
- html=True, image_num_list=None):
- """
- Generate tile summaries and tiles for all training images using multiple processes (one process per core).
-
- Args:
- display: If True, display images to screen (multiprocessed display not recommended).
- save_summary: If True, save tile summary images.
- save_data: If True, save tile data to csv file.
- save_top_tiles: If True, save top tiles to files.
- html: If True, generate HTML page to display tiled images.
- image_num_list: Optionally specify a list of image slide numbers.
- """
- timer = Time()
- print("Generating tile summaries (multiprocess)\n")
-
- if save_summary and not os.path.exists(slide.TILE_SUMMARY_DIR):
- os.makedirs(slide.TILE_SUMMARY_DIR)
-
- # how many processes to use
- num_processes = multiprocessing.cpu_count()
- pool = multiprocessing.Pool(num_processes)
-
- if image_num_list is not None:
- num_train_images = len(image_num_list)
- else:
- num_train_images = slide.get_num_training_slides()
- if num_processes > num_train_images:
- num_processes = num_train_images
- images_per_process = num_train_images / num_processes
-
- print("Number of processes: " + str(num_processes))
- print("Number of training images: " + str(num_train_images))
-
- tasks = []
- for num_process in range(1, num_processes + 1):
- start_index = (num_process - 1) * images_per_process + 1
- end_index = num_process * images_per_process
- start_index = int(start_index)
- end_index = int(end_index)
- if image_num_list is not None:
- sublist = image_num_list[start_index - 1:end_index]
- tasks.append((sublist, display, save_summary, save_data, save_top_tiles))
- print("Task #" + str(num_process) + ": Process slides " + str(sublist))
- else:
- tasks.append((start_index, end_index, display, save_summary, save_data, save_top_tiles))
- if start_index == end_index:
- print("Task #" + str(num_process) + ": Process slide " + str(start_index))
- else:
- print("Task #" + str(num_process) + ": Process slides " + str(start_index) + " to " + str(end_index))
-
- # start tasks
- results = []
- for t in tasks:
- if image_num_list is not None:
- results.append(pool.apply_async(image_list_to_tiles, t))
- else:
- results.append(pool.apply_async(image_range_to_tiles, t))
-
- slide_nums = list()
- tile_summaries_dict = dict()
- for result in results:
- image_nums, tile_summaries = result.get()
- slide_nums.extend(image_nums)
- tile_summaries_dict.update(tile_summaries)
- print("Done tiling slides: %s" % image_nums)
-
- if html:
- generate_tiled_html_result(slide_nums, tile_summaries_dict, save_data)
-
- print("Time to generate tile previews (multiprocess): %s\n" % str(timer.elapsed()))
-
-
-def image_row(slide_num, tile_summary, data_link):
- """
- Generate HTML for viewing a tiled image.
-
- Args:
- slide_num: The slide number.
- tile_summary: TileSummary object.
- data_link: If True, add link to tile data csv file.
-
- Returns:
- HTML table row for viewing a tiled image.
- """
- orig_img = slide.get_training_image_path(slide_num)
- orig_thumb = slide.get_training_thumbnail_path(slide_num)
- filt_img = slide.get_filter_image_result(slide_num)
- filt_thumb = slide.get_filter_thumbnail_result(slide_num)
- sum_img = slide.get_tile_summary_image_path(slide_num)
- sum_thumb = slide.get_tile_summary_thumbnail_path(slide_num)
- osum_img = slide.get_tile_summary_on_original_image_path(slide_num)
- osum_thumb = slide.get_tile_summary_on_original_thumbnail_path(slide_num)
- top_img = slide.get_top_tiles_image_path(slide_num)
- top_thumb = slide.get_top_tiles_thumbnail_path(slide_num)
- otop_img = slide.get_top_tiles_on_original_image_path(slide_num)
- otop_thumb = slide.get_top_tiles_on_original_thumbnail_path(slide_num)
- html = "
\n" + \
- " \n" + \
- " S%03d Original \n" % (orig_img, slide_num) + \
- " \n" % (orig_thumb) + \
- " \n" + \
- " | \n" + \
- " \n" + \
- " S%03d Filtered \n" % (filt_img, slide_num) + \
- " \n" % (filt_thumb) + \
- " \n" + \
- " | \n"
-
- html += " \n" + \
- " S%03d Tiles \n" % (sum_img, slide_num) + \
- " \n" % (sum_thumb) + \
- " \n" + \
- " | \n"
-
- html += " \n" + \
- " S%03d Tiles \n" % (osum_img, slide_num) + \
- " \n" % (osum_thumb) + \
- " \n" + \
- " | \n"
-
- html += " \n"
- if data_link:
- html += " S%03d Tile Summary\n" % slide_num + \
- " ( Data) \n" % slide.get_tile_data_path(slide_num)
- else:
- html += " S%03d Tile Summary \n" % slide_num
-
- html += " \n" + \
- " %s\n" % summary_stats(tile_summary).replace("\n", " \n ") + \
- " \n" + \
- " | \n"
-
- html += " \n" + \
- " S%03d Top Tiles \n" % (top_img, slide_num) + \
- " \n" % (top_thumb) + \
- " \n" + \
- " | \n"
-
- html += " \n" + \
- " S%03d Top Tiles \n" % (otop_img, slide_num) + \
- " \n" % (otop_thumb) + \
- " \n" + \
- " | \n"
-
- top_tiles = tile_summary.top_tiles()
- num_tiles = len(top_tiles)
- score_num = 0
- for t in top_tiles:
- score_num += 1
- t.tile_num = score_num
- # sort top tiles by rows and columns to make them easier to locate on HTML page
- top_tiles = sorted(top_tiles, key=lambda t: (t.r, t.c), reverse=False)
-
- html += " \n" + \
- " S%03d Top %d Tile Scores \n" % (slide_num, num_tiles) + \
- " \n"
-
- html += " \n"
- MAX_TILES_PER_ROW = 15
- num_cols = math.ceil(num_tiles / MAX_TILES_PER_ROW)
- num_rows = num_tiles if num_tiles < MAX_TILES_PER_ROW else MAX_TILES_PER_ROW
- for row in range(num_rows):
- html += " \n"
- for col in range(num_cols):
- html += " | "
- tile_num = row + (col * num_rows) + 1
- if tile_num <= num_tiles:
- t = top_tiles[tile_num - 1]
- label = "R%03d C%03d %0.4f (#%02d)" % (t.r, t.c, t.score, t.tile_num)
- tile_img_path = slide.get_tile_image_path(t)
- html += "%s" % (tile_img_path, label)
- else:
- html += " "
- html += " | \n"
- html += " \n"
- html += " \n"
-
- html += " \n"
- html += " | \n"
-
- html += "
\n"
- return html
-
-
-def generate_tiled_html_result(slide_nums, tile_summaries_dict, data_link):
- """
- Generate HTML to view the tiled images.
-
- Args:
- slide_nums: List of slide numbers.
- tile_summaries_dict: Dictionary of TileSummary objects keyed by slide number.
- data_link: If True, add link to tile data csv file.
- """
- slide_nums = sorted(slide_nums)
- if not slide.TILE_SUMMARY_PAGINATE:
- html = ""
- html += filter.html_header("Tiles")
-
- html += " \n"
- for slide_num in slide_nums:
- html += image_row(slide_num, data_link)
- html += "
\n"
-
- html += filter.html_footer()
- text_file = open(os.path.join(slide.TILE_SUMMARY_HTML_DIR, "tiles.html"), "w")
- text_file.write(html)
- text_file.close()
- else:
- total_len = len(slide_nums)
- page_size = slide.TILE_SUMMARY_PAGINATION_SIZE
- num_pages = math.ceil(total_len / page_size)
- for page_num in range(1, num_pages + 1):
- start_index = (page_num - 1) * page_size
- end_index = (page_num * page_size) if (page_num < num_pages) else total_len
- page_slide_nums = slide_nums[start_index:end_index]
-
- html = ""
- html += filter.html_header("Tiles, Page %d" % page_num)
-
- html += " "
- if page_num > 1:
- if page_num == 2:
- html += "
< "
- else:
- html += "
< " % (page_num - 1)
- html += "Page %d" % page_num
- if page_num < num_pages:
- html += "
> " % (page_num + 1)
- html += "
\n"
-
- html += " \n"
- for slide_num in page_slide_nums:
- tile_summary = tile_summaries_dict[slide_num]
- html += image_row(slide_num, tile_summary, data_link)
- html += "
\n"
-
- html += filter.html_footer()
- if page_num == 1:
- text_file = open(os.path.join(slide.TILE_SUMMARY_HTML_DIR, "tiles.html"), "w")
- else:
- text_file = open(os.path.join(slide.TILE_SUMMARY_HTML_DIR, "tiles-%d.html" % page_num), "w")
- text_file.write(html)
- text_file.close()
-
-
-def np_hsv_hue_histogram(h):
- """
- Create Matplotlib histogram of hue values for an HSV image and return the histogram as a NumPy array image.
-
- Args:
- h: Hue values as a 1-dimensional int NumPy array (scaled 0 to 360)
-
- Returns:
- Matplotlib histogram of hue values converted to a NumPy array image.
- """
- figure = plt.figure()
- canvas = figure.canvas
- _, _, patches = plt.hist(h, bins=360)
- plt.title("HSV Hue Histogram, mean=%3.1f, std=%3.1f" % (np.mean(h), np.std(h)))
-
- bin_num = 0
- for patch in patches:
- rgb_color = colorsys.hsv_to_rgb(bin_num / 360.0, 1, 1)
- patch.set_facecolor(rgb_color)
- bin_num += 1
-
- canvas.draw()
- w, h = canvas.get_width_height()
- np_hist = np.fromstring(canvas.get_renderer().tostring_rgb(), dtype=np.uint8).reshape(h, w, 3)
- plt.close(figure)
- util.np_info(np_hist)
- return np_hist
-
-
-def np_histogram(data, title, bins="auto"):
- """
- Create Matplotlib histogram and return it as a NumPy array image.
-
- Args:
- data: Data to plot in the histogram.
- title: Title of the histogram.
- bins: Number of histogram bins, "auto" by default.
-
- Returns:
- Matplotlib histogram as a NumPy array image.
- """
- figure = plt.figure()
- canvas = figure.canvas
- plt.hist(data, bins=bins)
- plt.title(title)
-
- canvas.draw()
- w, h = canvas.get_width_height()
- np_hist = np.fromstring(canvas.get_renderer().tostring_rgb(), dtype=np.uint8).reshape(h, w, 3)
- plt.close(figure)
- util.np_info(np_hist)
- return np_hist
-
-
-def np_hsv_saturation_histogram(s):
- """
- Create Matplotlib histogram of saturation values for an HSV image and return the histogram as a NumPy array image.
-
- Args:
- s: Saturation values as a 1-dimensional float NumPy array
-
- Returns:
- Matplotlib histogram of saturation values converted to a NumPy array image.
- """
- title = "HSV Saturation Histogram, mean=%.2f, std=%.2f" % (np.mean(s), np.std(s))
- return np_histogram(s, title)
-
-
-def np_hsv_value_histogram(v):
- """
- Create Matplotlib histogram of value values for an HSV image and return the histogram as a NumPy array image.
-
- Args:
- v: Value values as a 1-dimensional float NumPy array
-
- Returns:
- Matplotlib histogram of saturation values converted to a NumPy array image.
- """
- title = "HSV Value Histogram, mean=%.2f, std=%.2f" % (np.mean(v), np.std(v))
- return np_histogram(v, title)
-
-
-def np_rgb_channel_histogram(rgb, ch_num, ch_name):
- """
- Create Matplotlib histogram of an RGB channel for an RGB image and return the histogram as a NumPy array image.
-
- Args:
- rgb: Image as RGB NumPy array.
- ch_num: Which channel (0=red, 1=green, 2=blue)
- ch_name: Channel name ("R", "G", "B")
-
- Returns:
- Matplotlib histogram of RGB channel converted to a NumPy array image.
- """
-
- ch = rgb[:, :, ch_num]
- ch = ch.flatten()
- title = "RGB %s Histogram, mean=%.2f, std=%.2f" % (ch_name, np.mean(ch), np.std(ch))
- return np_histogram(ch, title, bins=256)
-
-
-def np_rgb_r_histogram(rgb):
- """
- Obtain RGB R channel histogram as a NumPy array image.
-
- Args:
- rgb: Image as RGB NumPy array.
-
- Returns:
- Histogram of RGB R channel as a NumPy array image.
- """
- hist = np_rgb_channel_histogram(rgb, 0, "R")
- return hist
-
-
-def np_rgb_g_histogram(rgb):
- """
- Obtain RGB G channel histogram as a NumPy array image.
-
- Args:
- rgb: Image as RGB NumPy array.
-
- Returns:
- Histogram of RGB G channel as a NumPy array image.
- """
- hist = np_rgb_channel_histogram(rgb, 1, "G")
- return hist
-
-
-def np_rgb_b_histogram(rgb):
- """
- Obtain RGB B channel histogram as a NumPy array image.
-
- Args:
- rgb: Image as RGB NumPy array.
-
- Returns:
- Histogram of RGB B channel as a NumPy array image.
- """
- hist = np_rgb_channel_histogram(rgb, 2, "B")
- return hist
-
-
-def pil_hue_histogram(h):
- """
- Create Matplotlib histogram of hue values for an HSV image and return the histogram as a PIL image.
-
- Args:
- h: Hue values as a 1-dimensional int NumPy array (scaled 0 to 360)
-
- Returns:
- Matplotlib histogram of hue values converted to a PIL image.
- """
- np_hist = np_hsv_hue_histogram(h)
- pil_hist = util.np_to_pil(np_hist)
- return pil_hist
-
-
-def display_image_with_hsv_hue_histogram(np_rgb, text=None, scale_up=False):
- """
- Display an image with its corresponding hue histogram.
-
- Args:
- np_rgb: RGB image tile as a NumPy array
- text: Optional text to display above image
- scale_up: If True, scale up image to display by slide.SCALE_FACTOR
- """
- hsv = filter.filter_rgb_to_hsv(np_rgb)
- h = filter.filter_hsv_to_h(hsv)
- np_hist = np_hsv_hue_histogram(h)
- hist_r, hist_c, _ = np_hist.shape
-
- if scale_up:
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=1)
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=0)
-
- img_r, img_c, img_ch = np_rgb.shape
- if text is not None:
- np_t = np_text(text)
- t_r, t_c, _ = np_t.shape
- t_i_c = max(t_c, img_c)
- t_i_r = t_r + img_r
- t_i = np.zeros([t_i_r, t_i_c, img_ch], dtype=np.uint8)
- t_i.fill(255)
- t_i[0:t_r, 0:t_c] = np_t
- t_i[t_r:t_r + img_r, 0:img_c] = np_rgb
- np_rgb = t_i # for simplicity assign title+image to image
- img_r, img_c, img_ch = np_rgb.shape
-
- r = max(img_r, hist_r)
- c = img_c + hist_c
- combo = np.zeros([r, c, img_ch], dtype=np.uint8)
- combo.fill(255)
- combo[0:img_r, 0:img_c] = np_rgb
- combo[0:hist_r, img_c:c] = np_hist
- pil_combo = util.np_to_pil(combo)
- pil_combo.show()
-
-
-def display_image(np_rgb, text=None, scale_up=False):
- """
- Display an image with optional text above image.
-
- Args:
- np_rgb: RGB image tile as a NumPy array
- text: Optional text to display above image
- scale_up: If True, scale up image to display by slide.SCALE_FACTOR
- """
- if scale_up:
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=1)
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=0)
-
- img_r, img_c, img_ch = np_rgb.shape
- if text is not None:
- np_t = np_text(text)
- t_r, t_c, _ = np_t.shape
- t_i_c = max(t_c, img_c)
- t_i_r = t_r + img_r
- t_i = np.zeros([t_i_r, t_i_c, img_ch], dtype=np.uint8)
- t_i.fill(255)
- t_i[0:t_r, 0:t_c] = np_t
- t_i[t_r:t_r + img_r, 0:img_c] = np_rgb
- np_rgb = t_i
-
- pil_img = util.np_to_pil(np_rgb)
- pil_img.show()
-
-
-def display_image_with_hsv_histograms(np_rgb, text=None, scale_up=False):
- """
- Display an image with its corresponding HSV hue, saturation, and value histograms.
-
- Args:
- np_rgb: RGB image tile as a NumPy array
- text: Optional text to display above image
- scale_up: If True, scale up image to display by slide.SCALE_FACTOR
- """
- hsv = filter.filter_rgb_to_hsv(np_rgb)
- np_h = np_hsv_hue_histogram(filter.filter_hsv_to_h(hsv))
- np_s = np_hsv_saturation_histogram(filter.filter_hsv_to_s(hsv))
- np_v = np_hsv_value_histogram(filter.filter_hsv_to_v(hsv))
- h_r, h_c, _ = np_h.shape
- s_r, s_c, _ = np_s.shape
- v_r, v_c, _ = np_v.shape
-
- if scale_up:
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=1)
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=0)
-
- img_r, img_c, img_ch = np_rgb.shape
- if text is not None:
- np_t = np_text(text)
- t_r, t_c, _ = np_t.shape
- t_i_c = max(t_c, img_c)
- t_i_r = t_r + img_r
- t_i = np.zeros([t_i_r, t_i_c, img_ch], dtype=np.uint8)
- t_i.fill(255)
- t_i[0:t_r, 0:t_c] = np_t
- t_i[t_r:t_r + img_r, 0:img_c] = np_rgb
- np_rgb = t_i # for simplicity assign title+image to image
- img_r, img_c, img_ch = np_rgb.shape
-
- hists_c = max(h_c, s_c, v_c)
- hists_r = h_r + s_r + v_r
- hists = np.zeros([hists_r, hists_c, img_ch], dtype=np.uint8)
-
- hists[0:h_r, 0:h_c] = np_h
- hists[h_r:h_r + s_r, 0:s_c] = np_s
- hists[h_r + s_r:h_r + s_r + v_r, 0:v_c] = np_v
-
- r = max(img_r, hists_r)
- c = img_c + hists_c
- combo = np.zeros([r, c, img_ch], dtype=np.uint8)
- combo.fill(255)
- combo[0:img_r, 0:img_c] = np_rgb
- combo[0:hists_r, img_c:c] = hists
- pil_combo = util.np_to_pil(combo)
- pil_combo.show()
-
-
-def display_image_with_rgb_histograms(np_rgb, text=None, scale_up=False):
- """
- Display an image with its corresponding RGB histograms.
-
- Args:
- np_rgb: RGB image tile as a NumPy array
- text: Optional text to display above image
- scale_up: If True, scale up image to display by slide.SCALE_FACTOR
- """
- np_r = np_rgb_r_histogram(np_rgb)
- np_g = np_rgb_g_histogram(np_rgb)
- np_b = np_rgb_b_histogram(np_rgb)
- r_r, r_c, _ = np_r.shape
- g_r, g_c, _ = np_g.shape
- b_r, b_c, _ = np_b.shape
-
- if scale_up:
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=1)
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=0)
-
- img_r, img_c, img_ch = np_rgb.shape
- if text is not None:
- np_t = np_text(text)
- t_r, t_c, _ = np_t.shape
- t_i_c = max(t_c, img_c)
- t_i_r = t_r + img_r
- t_i = np.zeros([t_i_r, t_i_c, img_ch], dtype=np.uint8)
- t_i.fill(255)
- t_i[0:t_r, 0:t_c] = np_t
- t_i[t_r:t_r + img_r, 0:img_c] = np_rgb
- np_rgb = t_i # for simplicity assign title+image to image
- img_r, img_c, img_ch = np_rgb.shape
-
- hists_c = max(r_c, g_c, b_c)
- hists_r = r_r + g_r + b_r
- hists = np.zeros([hists_r, hists_c, img_ch], dtype=np.uint8)
-
- hists[0:r_r, 0:r_c] = np_r
- hists[r_r:r_r + g_r, 0:g_c] = np_g
- hists[r_r + g_r:r_r + g_r + b_r, 0:b_c] = np_b
-
- r = max(img_r, hists_r)
- c = img_c + hists_c
- combo = np.zeros([r, c, img_ch], dtype=np.uint8)
- combo.fill(255)
- combo[0:img_r, 0:img_c] = np_rgb
- combo[0:hists_r, img_c:c] = hists
- pil_combo = util.np_to_pil(combo)
- pil_combo.show()
-
-
-def pil_text(text, w_border=TILE_TEXT_W_BORDER, h_border=TILE_TEXT_H_BORDER, font_path=FONT_PATH,
- font_size=TILE_TEXT_SIZE, text_color=TILE_TEXT_COLOR, background=TILE_TEXT_BACKGROUND_COLOR):
- """
- Obtain a PIL image representation of text.
-
- Args:
- text: The text to convert to an image.
- w_border: Tile text width border (left and right).
- h_border: Tile text height border (top and bottom).
- font_path: Path to font.
- font_size: Size of font.
- text_color: Tile text color.
- background: Tile background color.
-
- Returns:
- PIL image representing the text.
- """
-
- font = ImageFont.truetype(font_path, font_size)
- x, y = ImageDraw.Draw(Image.new("RGB", (1, 1), background)).textsize(text, font)
- image = Image.new("RGB", (x + 2 * w_border, y + 2 * h_border), background)
- draw = ImageDraw.Draw(image)
- draw.text((w_border, h_border), text, text_color, font=font)
- return image
-
-
-def np_text(text, w_border=TILE_TEXT_W_BORDER, h_border=TILE_TEXT_H_BORDER, font_path=FONT_PATH,
- font_size=TILE_TEXT_SIZE, text_color=TILE_TEXT_COLOR, background=TILE_TEXT_BACKGROUND_COLOR):
- """
- Obtain a NumPy array image representation of text.
-
- Args:
- text: The text to convert to an image.
- w_border: Tile text width border (left and right).
- h_border: Tile text height border (top and bottom).
- font_path: Path to font.
- font_size: Size of font.
- text_color: Tile text color.
- background: Tile background color.
-
- Returns:
- NumPy array representing the text.
- """
- pil_img = pil_text(text, w_border, h_border, font_path, font_size,
- text_color, background)
- np_img = util.pil_to_np_rgb(pil_img)
- return np_img
-
-
-def display_tile(tile, rgb_histograms=True, hsv_histograms=True):
- """
- Display a tile with its corresponding RGB and HSV histograms.
-
- Args:
- tile: The Tile object.
- rgb_histograms: If True, display RGB histograms.
- hsv_histograms: If True, display HSV histograms.
- """
-
- text = "S%03d R%03d C%03d\n" % (tile.slide_num, tile.r, tile.c)
- text += "Score:%4.2f Tissue:%5.2f%% CF:%2.0f SVF:%4.2f QF:%4.2f\n" % (
- tile.score, tile.tissue_percentage, tile.color_factor, tile.s_and_v_factor, tile.quantity_factor)
- text += "Rank #%d of %d" % (tile.rank, tile.tile_summary.num_tiles())
-
- np_scaled_tile = tile.get_np_scaled_tile()
- if np_scaled_tile is not None:
- small_text = text + "\n \nSmall Tile (%d x %d)" % (np_scaled_tile.shape[1], np_scaled_tile.shape[0])
- if rgb_histograms and hsv_histograms:
- display_image_with_rgb_and_hsv_histograms(np_scaled_tile, small_text, scale_up=True)
- elif rgb_histograms:
- display_image_with_rgb_histograms(np_scaled_tile, small_text, scale_up=True)
- elif hsv_histograms:
- display_image_with_hsv_histograms(np_scaled_tile, small_text, scale_up=True)
- else:
- display_image(np_scaled_tile, small_text, scale_up=True)
-
- np_tile = tile.get_np_tile()
- text += " based on small tile\n \nLarge Tile (%d x %d)" % (np_tile.shape[1], np_tile.shape[0])
- if rgb_histograms and hsv_histograms:
- display_image_with_rgb_and_hsv_histograms(np_tile, text)
- elif rgb_histograms:
- display_image_with_rgb_histograms(np_tile, text)
- elif hsv_histograms:
- display_image_with_hsv_histograms(np_tile, text)
- else:
- display_image(np_tile, text)
-
-
-def display_image_with_rgb_and_hsv_histograms(np_rgb, text=None, scale_up=False):
- """
- Display a tile with its corresponding RGB and HSV histograms.
-
- Args:
- np_rgb: RGB image tile as a NumPy array
- text: Optional text to display above image
- scale_up: If True, scale up image to display by slide.SCALE_FACTOR
- """
- hsv = filter.filter_rgb_to_hsv(np_rgb)
- np_r = np_rgb_r_histogram(np_rgb)
- np_g = np_rgb_g_histogram(np_rgb)
- np_b = np_rgb_b_histogram(np_rgb)
- np_h = np_hsv_hue_histogram(filter.filter_hsv_to_h(hsv))
- np_s = np_hsv_saturation_histogram(filter.filter_hsv_to_s(hsv))
- np_v = np_hsv_value_histogram(filter.filter_hsv_to_v(hsv))
-
- r_r, r_c, _ = np_r.shape
- g_r, g_c, _ = np_g.shape
- b_r, b_c, _ = np_b.shape
- h_r, h_c, _ = np_h.shape
- s_r, s_c, _ = np_s.shape
- v_r, v_c, _ = np_v.shape
-
- if scale_up:
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=1)
- np_rgb = np.repeat(np_rgb, slide.SCALE_FACTOR, axis=0)
-
- img_r, img_c, img_ch = np_rgb.shape
- if text is not None:
- np_t = np_text(text)
- t_r, t_c, _ = np_t.shape
- t_i_c = max(t_c, img_c)
- t_i_r = t_r + img_r
- t_i = np.zeros([t_i_r, t_i_c, img_ch], dtype=np.uint8)
- t_i.fill(255)
- t_i[0:t_r, 0:t_c] = np_t
- t_i[t_r:t_r + img_r, 0:img_c] = np_rgb
- np_rgb = t_i # for simplicity assign title+image to image
- img_r, img_c, img_ch = np_rgb.shape
-
- rgb_hists_c = max(r_c, g_c, b_c)
- rgb_hists_r = r_r + g_r + b_r
- rgb_hists = np.zeros([rgb_hists_r, rgb_hists_c, img_ch], dtype=np.uint8)
- rgb_hists[0:r_r, 0:r_c] = np_r
- rgb_hists[r_r:r_r + g_r, 0:g_c] = np_g
- rgb_hists[r_r + g_r:r_r + g_r + b_r, 0:b_c] = np_b
-
- hsv_hists_c = max(h_c, s_c, v_c)
- hsv_hists_r = h_r + s_r + v_r
- hsv_hists = np.zeros([hsv_hists_r, hsv_hists_c, img_ch], dtype=np.uint8)
- hsv_hists[0:h_r, 0:h_c] = np_h
- hsv_hists[h_r:h_r + s_r, 0:s_c] = np_s
- hsv_hists[h_r + s_r:h_r + s_r + v_r, 0:v_c] = np_v
-
- r = max(img_r, rgb_hists_r, hsv_hists_r)
- c = img_c + rgb_hists_c + hsv_hists_c
- combo = np.zeros([r, c, img_ch], dtype=np.uint8)
- combo.fill(255)
- combo[0:img_r, 0:img_c] = np_rgb
- combo[0:rgb_hists_r, img_c:img_c + rgb_hists_c] = rgb_hists
- combo[0:hsv_hists_r, img_c + rgb_hists_c:c] = hsv_hists
- pil_combo = util.np_to_pil(combo)
- pil_combo.show()
-
-
-def rgb_to_hues(rgb):
- """
- Convert RGB NumPy array to 1-dimensional array of hue values (HSV H values in degrees).
-
- Args:
- rgb: RGB image as a NumPy array
-
- Returns:
- 1-dimensional array of hue values in degrees
- """
- hsv = filter.filter_rgb_to_hsv(rgb, display_np_info=False)
- h = filter.filter_hsv_to_h(hsv, display_np_info=False)
- return h
-
-
-def hsv_saturation_and_value_factor(rgb):
- """
- Function to reduce scores of tiles with narrow HSV saturations and values since saturation and value standard
- deviations should be relatively broad if the tile contains significant tissue.
-
- Example of a blurred tile that should not be ranked as a top tile:
- ../data/tiles_png/006/TUPAC-TR-006-tile-r58-c3-x2048-y58369-w1024-h1024.png
-
- Args:
- rgb: RGB image as a NumPy array
-
- Returns:
- Saturation and value factor, where 1 is no effect and less than 1 means the standard deviations of saturation and
- value are relatively small.
- """
- hsv = filter.filter_rgb_to_hsv(rgb, display_np_info=False)
- s = filter.filter_hsv_to_s(hsv)
- v = filter.filter_hsv_to_v(hsv)
- s_std = np.std(s)
- v_std = np.std(v)
- if s_std < 0.05 and v_std < 0.05:
- factor = 0.4
- elif s_std < 0.05:
- factor = 0.7
- elif v_std < 0.05:
- factor = 0.7
- else:
- factor = 1
-
- factor = factor ** 2
- return factor
-
-
-def hsv_purple_deviation(hsv_hues):
- """
- Obtain the deviation from the HSV hue for purple.
-
- Args:
- hsv_hues: NumPy array of HSV hue values.
-
- Returns:
- The HSV purple deviation.
- """
- purple_deviation = np.sqrt(np.mean(np.abs(hsv_hues - HSV_PURPLE) ** 2))
- return purple_deviation
-
-
-def hsv_pink_deviation(hsv_hues):
- """
- Obtain the deviation from the HSV hue for pink.
-
- Args:
- hsv_hues: NumPy array of HSV hue values.
-
- Returns:
- The HSV pink deviation.
- """
- pink_deviation = np.sqrt(np.mean(np.abs(hsv_hues - HSV_PINK) ** 2))
- return pink_deviation
-
-
-def hsv_purple_pink_factor(rgb):
- """
- Compute scoring factor based on purple and pink HSV hue deviations and degree to which a narrowed hue color range
- average is purple versus pink.
-
- Args:
- rgb: Image an NumPy array.
-
- Returns:
- Factor that favors purple (hematoxylin stained) tissue over pink (eosin stained) tissue.
- """
- hues = rgb_to_hues(rgb)
- hues = hues[hues >= 260] # exclude hues under 260
- hues = hues[hues <= 340] # exclude hues over 340
- if len(hues) == 0:
- return 0 # if no hues between 260 and 340, then not purple or pink
- pu_dev = hsv_purple_deviation(hues)
- pi_dev = hsv_pink_deviation(hues)
- avg_factor = (340 - np.average(hues)) ** 2
-
- if pu_dev == 0: # avoid divide by zero if tile has no tissue
- return 0
-
- factor = pi_dev / pu_dev * avg_factor
- return factor
-
-
-def hsv_purple_vs_pink_average_factor(rgb, tissue_percentage):
- """
- Function to favor purple (hematoxylin) over pink (eosin) staining based on the distance of the HSV hue average
- from purple and pink.
-
- Args:
- rgb: Image as RGB NumPy array
- tissue_percentage: Amount of tissue on the tile
-
- Returns:
- Factor, where >1 to boost purple slide scores, <1 to reduce pink slide scores, or 1 no effect.
- """
-
- factor = 1
- # only applies to slides with a high quantity of tissue
- if tissue_percentage < TISSUE_HIGH_THRESH:
- return factor
-
- hues = rgb_to_hues(rgb)
- hues = hues[hues >= 200] # Remove hues under 200
- if len(hues) == 0:
- return factor
- avg = np.average(hues)
- # pil_hue_histogram(hues).show()
-
- pu = HSV_PURPLE - avg
- pi = HSV_PINK - avg
- pupi = pu + pi
- # print("Av: %4d, Pu: %4d, Pi: %4d, PuPi: %4d" % (avg, pu, pi, pupi))
- # Av: 250, Pu: 20, Pi: 80, PuPi: 100
- # Av: 260, Pu: 10, Pi: 70, PuPi: 80
- # Av: 270, Pu: 0, Pi: 60, PuPi: 60 ** PURPLE
- # Av: 280, Pu: -10, Pi: 50, PuPi: 40
- # Av: 290, Pu: -20, Pi: 40, PuPi: 20
- # Av: 300, Pu: -30, Pi: 30, PuPi: 0
- # Av: 310, Pu: -40, Pi: 20, PuPi: -20
- # Av: 320, Pu: -50, Pi: 10, PuPi: -40
- # Av: 330, Pu: -60, Pi: 0, PuPi: -60 ** PINK
- # Av: 340, Pu: -70, Pi: -10, PuPi: -80
- # Av: 350, Pu: -80, Pi: -20, PuPi: -100
-
- if pupi > 30:
- factor *= 1.2
- if pupi < -30:
- factor *= .8
- if pupi > 0:
- factor *= 1.2
- if pupi > 50:
- factor *= 1.2
- if pupi < -60:
- factor *= .8
-
- return factor
-
-
-class TileSummary:
- """
- Class for tile summary information.
- """
-
- slide_num = None
- orig_w = None
- orig_h = None
- orig_tile_w = None
- orig_tile_h = None
- scale_factor = slide.SCALE_FACTOR
- scaled_w = None
- scaled_h = None
- scaled_tile_w = None
- scaled_tile_h = None
- mask_percentage = None
- num_row_tiles = None
- num_col_tiles = None
-
- count = 0
- high = 0
- medium = 0
- low = 0
- none = 0
-
- def __init__(self, slide_num, orig_w, orig_h, orig_tile_w, orig_tile_h, scaled_w, scaled_h, scaled_tile_w,
- scaled_tile_h, tissue_percentage, num_col_tiles, num_row_tiles):
- self.slide_num = slide_num
- self.orig_w = orig_w
- self.orig_h = orig_h
- self.orig_tile_w = orig_tile_w
- self.orig_tile_h = orig_tile_h
- self.scaled_w = scaled_w
- self.scaled_h = scaled_h
- self.scaled_tile_w = scaled_tile_w
- self.scaled_tile_h = scaled_tile_h
- self.tissue_percentage = tissue_percentage
- self.num_col_tiles = num_col_tiles
- self.num_row_tiles = num_row_tiles
- self.tiles = []
-
- def __str__(self):
- return summary_title(self) + "\n" + summary_stats(self)
-
- def mask_percentage(self):
- """
- Obtain the percentage of the slide that is masked.
-
- Returns:
- The amount of the slide that is masked as a percentage.
- """
- return 100 - self.tissue_percentage
-
- def num_tiles(self):
- """
- Retrieve the total number of tiles.
-
- Returns:
- The total number of tiles (number of rows * number of columns).
- """
- return self.num_row_tiles * self.num_col_tiles
-
- def tiles_by_tissue_percentage(self):
- """
- Retrieve the tiles ranked by tissue percentage.
-
- Returns:
- List of the tiles ranked by tissue percentage.
- """
- sorted_list = sorted(self.tiles, key=lambda t: t.tissue_percentage, reverse=True)
- return sorted_list
-
- def tiles_by_score(self):
- """
- Retrieve the tiles ranked by score.
-
- Returns:
- List of the tiles ranked by score.
- """
- sorted_list = sorted(self.tiles, key=lambda t: t.score, reverse=True)
- return sorted_list
-
- def top_tiles(self):
- """
- Retrieve the top-scoring tiles.
-
- Returns:
- List of the top-scoring tiles.
- """
- sorted_tiles = self.tiles_by_score()
- top_tiles = sorted_tiles[:NUM_TOP_TILES]
- return top_tiles
-
- def get_tile(self, row, col):
- """
- Retrieve tile by row and column.
-
- Args:
- row: The row
- col: The column
-
- Returns:
- Corresponding Tile object.
- """
- tile_index = (row - 1) * self.num_col_tiles + (col - 1)
- tile = self.tiles[tile_index]
- return tile
-
- def display_summaries(self):
- """
- Display summary images.
- """
- summary_and_tiles(self.slide_num, display=True, save_summary=False, save_data=False, save_top_tiles=False)
-
-
-class Tile:
- """
- Class for information about a tile.
- """
-
- def __init__(self, tile_summary, slide_num, np_scaled_tile, tile_num, r, c, r_s, r_e, c_s, c_e, o_r_s, o_r_e, o_c_s,
- o_c_e, t_p, color_factor, s_and_v_factor, quantity_factor, score):
- self.tile_summary = tile_summary
- self.slide_num = slide_num
- self.np_scaled_tile = np_scaled_tile
- self.tile_num = tile_num
- self.r = r
- self.c = c
- self.r_s = r_s
- self.r_e = r_e
- self.c_s = c_s
- self.c_e = c_e
- self.o_r_s = o_r_s
- self.o_r_e = o_r_e
- self.o_c_s = o_c_s
- self.o_c_e = o_c_e
- self.tissue_percentage = t_p
- self.color_factor = color_factor
- self.s_and_v_factor = s_and_v_factor
- self.quantity_factor = quantity_factor
- self.score = score
-
- def __str__(self):
- return "[Tile #%d, Row #%d, Column #%d, Tissue %4.2f%%, Score %0.4f]" % (
- self.tile_num, self.r, self.c, self.tissue_percentage, self.score)
-
- def __repr__(self):
- return "\n" + self.__str__()
-
- def mask_percentage(self):
- return 100 - self.tissue_percentage
-
- def tissue_quantity(self):
- return tissue_quantity(self.tissue_percentage)
-
- def get_pil_tile(self):
- return tile_to_pil_tile(self)
-
- def get_np_tile(self):
- return tile_to_np_tile(self)
-
- def save_tile(self):
- save_display_tile(self, save=True, display=False)
-
- def display_tile(self):
- save_display_tile(self, save=False, display=True)
-
- def display_with_histograms(self):
- display_tile(self, rgb_histograms=True, hsv_histograms=True)
-
- def get_np_scaled_tile(self):
- return self.np_scaled_tile
-
- def get_pil_scaled_tile(self):
- return util.np_to_pil(self.np_scaled_tile)
-
-
-class TissueQuantity(Enum):
- NONE = 0
- LOW = 1
- MEDIUM = 2
- HIGH = 3
-
-
-def dynamic_tiles(slide_num, small_tile_in_tile=False):
- """
- Generate tile summary with top tiles using original WSI training slide without intermediate image files saved to
- file system.
-
- Args:
- slide_num: The slide number.
- small_tile_in_tile: If True, include the small NumPy image in the Tile objects.
-
- Returns:
- TileSummary object with list of top Tile objects. The actual tile images are not retrieved until the
- Tile get_tile() methods are called.
- """
- np_img, large_w, large_h, small_w, small_h = slide.slide_to_scaled_np_image(slide_num)
- filt_np_img = filter.apply_image_filters(np_img)
- tile_summary = score_tiles(slide_num, filt_np_img, (large_w, large_h, small_w, small_h), small_tile_in_tile)
- return tile_summary
-
-
-def dynamic_tile(slide_num, row, col, small_tile_in_tile=False):
- """
- Generate a single tile dynamically based on slide number, row, and column. If more than one tile needs to be
- retrieved dynamically, dynamic_tiles() should be used.
-
- Args:
- slide_num: The slide number.
- row: The row.
- col: The column.
- small_tile_in_tile: If True, include the small NumPy image in the Tile objects.
-
- Returns:
- Tile tile object.
- """
- tile_summary = dynamic_tiles(slide_num, small_tile_in_tile)
- tile = tile_summary.get_tile(row, col)
- return tile
-
-# if __name__ == "__main__":
- # tile = dynamic_tile(2, 29, 16, True)
- # tile.display_with_histograms()
-
- # singleprocess_filtered_images_to_tiles()
- # multiprocess_filtered_images_to_tiles()
diff --git a/docs/wsi-preprocessing-in-python/_layouts/default.html b/docs/wsi-preprocessing-in-python/_layouts/default.html
deleted file mode 100644
index cddd070..0000000
--- a/docs/wsi-preprocessing-in-python/_layouts/default.html
+++ /dev/null
@@ -1 +0,0 @@
-{{ content }}
diff --git a/docs/wsi-preprocessing-in-python/images/127-rgb-after-filters.png b/docs/wsi-preprocessing-in-python/images/127-rgb-after-filters.png
deleted file mode 100644
index 5ba0ddb..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/127-rgb-after-filters.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/127-rgb.png b/docs/wsi-preprocessing-in-python/images/127-rgb.png
deleted file mode 100644
index 76a2383..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/127-rgb.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-2.png b/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-2.png
deleted file mode 100644
index 80c94b7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-2.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-avoid-overmask-rem-small-obj.png b/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-avoid-overmask-rem-small-obj.png
deleted file mode 100644
index 8be2ba0..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-avoid-overmask-rem-small-obj.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-overmask-rem-small-obj.png b/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-overmask-rem-small-obj.png
deleted file mode 100644
index 9aa1ed8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch-overmask-rem-small-obj.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch.png b/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch.png
deleted file mode 100644
index 80c94b7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-avoid-overmask-green-ch.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-avoid-overmask-rem-small-obj.png b/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-avoid-overmask-rem-small-obj.png
deleted file mode 100644
index da51436..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-avoid-overmask-rem-small-obj.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-overmask-rem-small-obj.png b/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-overmask-rem-small-obj.png
deleted file mode 100644
index bb4c7cf..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch-overmask-rem-small-obj.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch.png b/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch.png
deleted file mode 100644
index 5901473..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-overmask-green-ch.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/21-rgb.png b/docs/wsi-preprocessing-in-python/images/21-rgb.png
deleted file mode 100644
index ba06b04..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/21-rgb.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-001.png b/docs/wsi-preprocessing-in-python/images/337-001.png
deleted file mode 100644
index 7a86cfe..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-001.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-002.png b/docs/wsi-preprocessing-in-python/images/337-002.png
deleted file mode 100644
index 91c76e4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-002.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-003.png b/docs/wsi-preprocessing-in-python/images/337-003.png
deleted file mode 100644
index 9c59bd6..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-003.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-004.png b/docs/wsi-preprocessing-in-python/images/337-004.png
deleted file mode 100644
index 0872986..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-004.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-005.png b/docs/wsi-preprocessing-in-python/images/337-005.png
deleted file mode 100644
index f725bf1..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-005.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-006.png b/docs/wsi-preprocessing-in-python/images/337-006.png
deleted file mode 100644
index a8ddfb2..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-006.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-007.png b/docs/wsi-preprocessing-in-python/images/337-007.png
deleted file mode 100644
index e08d3a8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-007.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/337-008.png b/docs/wsi-preprocessing-in-python/images/337-008.png
deleted file mode 100644
index 0aeeca9..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/337-008.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/424-rgb.png b/docs/wsi-preprocessing-in-python/images/424-rgb.png
deleted file mode 100644
index 4ac79a1..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/424-rgb.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/498-rgb-after-filters.png b/docs/wsi-preprocessing-in-python/images/498-rgb-after-filters.png
deleted file mode 100644
index 6eebd1c..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/498-rgb-after-filters.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/498-rgb.png b/docs/wsi-preprocessing-in-python/images/498-rgb.png
deleted file mode 100644
index 84fe3f0..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/498-rgb.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/5-steps.png b/docs/wsi-preprocessing-in-python/images/5-steps.png
deleted file mode 100644
index c3c1f20..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/5-steps.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r34-c34-x33793-y33799-w1024-h1024.png b/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r34-c34-x33793-y33799-w1024-h1024.png
deleted file mode 100644
index f21777a..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r34-c34-x33793-y33799-w1024-h1024.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r35-c37-x36865-y34823-w1024-h1024.png b/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r35-c37-x36865-y34823-w1024-h1024.png
deleted file mode 100644
index 5a9169c..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/TUPAC-TR-002-tile-r35-c37-x36865-y34823-w1024-h1024.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/adaptive-equalization.png b/docs/wsi-preprocessing-in-python/images/adaptive-equalization.png
deleted file mode 100644
index 12ef7dc..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/adaptive-equalization.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/basic-threshold.png b/docs/wsi-preprocessing-in-python/images/basic-threshold.png
deleted file mode 100644
index b38941a..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/basic-threshold.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-closing-20.png b/docs/wsi-preprocessing-in-python/images/binary-closing-20.png
deleted file mode 100644
index bee169d..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-closing-20.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-closing-5.png b/docs/wsi-preprocessing-in-python/images/binary-closing-5.png
deleted file mode 100644
index cbc58b8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-closing-5.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-dilation-20.png b/docs/wsi-preprocessing-in-python/images/binary-dilation-20.png
deleted file mode 100644
index b301b15..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-dilation-20.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-dilation-5.png b/docs/wsi-preprocessing-in-python/images/binary-dilation-5.png
deleted file mode 100644
index 30903c5..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-dilation-5.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-erosion-20.png b/docs/wsi-preprocessing-in-python/images/binary-erosion-20.png
deleted file mode 100644
index 499d33e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-erosion-20.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-erosion-5.png b/docs/wsi-preprocessing-in-python/images/binary-erosion-5.png
deleted file mode 100644
index 7501409..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-erosion-5.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-erosion-no-grays.png b/docs/wsi-preprocessing-in-python/images/binary-erosion-no-grays.png
deleted file mode 100644
index a861ff3..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-erosion-no-grays.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-erosion-original.png b/docs/wsi-preprocessing-in-python/images/binary-erosion-original.png
deleted file mode 100644
index 9658135..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-erosion-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-opening-20.png b/docs/wsi-preprocessing-in-python/images/binary-opening-20.png
deleted file mode 100644
index d6db36f..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-opening-20.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/binary-opening-5.png b/docs/wsi-preprocessing-in-python/images/binary-opening-5.png
deleted file mode 100644
index 173ed7b..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/binary-opening-5.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/blue-filter.png b/docs/wsi-preprocessing-in-python/images/blue-filter.png
deleted file mode 100644
index 93d8385..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/blue-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/blue-original.png b/docs/wsi-preprocessing-in-python/images/blue-original.png
deleted file mode 100644
index c4a6e5e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/blue-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/blue-pen-filter.png b/docs/wsi-preprocessing-in-python/images/blue-pen-filter.png
deleted file mode 100644
index 082488d..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/blue-pen-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/blue-pen.png b/docs/wsi-preprocessing-in-python/images/blue-pen.png
deleted file mode 100644
index d25aec7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/blue-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/blue.png b/docs/wsi-preprocessing-in-python/images/blue.png
deleted file mode 100644
index a4569ff..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/blue.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/canny-original-cropped.png b/docs/wsi-preprocessing-in-python/images/canny-original-cropped.png
deleted file mode 100644
index d5a042d..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/canny-original-cropped.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/canny-original-with-inverse-mask.png b/docs/wsi-preprocessing-in-python/images/canny-original-with-inverse-mask.png
deleted file mode 100644
index 610af40..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/canny-original-with-inverse-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/canny-original.png b/docs/wsi-preprocessing-in-python/images/canny-original.png
deleted file mode 100644
index 9033075..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/canny-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/canny.png b/docs/wsi-preprocessing-in-python/images/canny.png
deleted file mode 100644
index fa92c1c..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/canny.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/color-histograms-large-tile.png b/docs/wsi-preprocessing-in-python/images/color-histograms-large-tile.png
deleted file mode 100644
index 5b05cce..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/color-histograms-large-tile.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/color-histograms-small-tile.png b/docs/wsi-preprocessing-in-python/images/color-histograms-small-tile.png
deleted file mode 100644
index 7f5e78b..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/color-histograms-small-tile.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-blue-pen.png b/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-blue-pen.png
deleted file mode 100644
index 3b6bf35..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-blue-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen-no-blue-pen.png b/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen-no-blue-pen.png
deleted file mode 100644
index f8f8248..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen-no-blue-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen.png b/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen.png
deleted file mode 100644
index a1b710e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-no-green-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original-with-no-green-pen-no-blue-pen.png b/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original-with-no-green-pen-no-blue-pen.png
deleted file mode 100644
index 653a593..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original-with-no-green-pen-no-blue-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original.png b/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original.png
deleted file mode 100644
index 4100395..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pen-filters-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pens-background-mask.png b/docs/wsi-preprocessing-in-python/images/combine-pens-background-mask.png
deleted file mode 100644
index 095d873..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pens-background-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-inverse-mask.png b/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-inverse-mask.png
deleted file mode 100644
index ef96f4c..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-inverse-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-mask.png b/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-mask.png
deleted file mode 100644
index d7dd065..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original-with-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original.png b/docs/wsi-preprocessing-in-python/images/combine-pens-background-original.png
deleted file mode 100644
index 664ebee..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/combine-pens-background-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/complement.png b/docs/wsi-preprocessing-in-python/images/complement.png
deleted file mode 100644
index cbeb858..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/complement.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/contrast-stretching.png b/docs/wsi-preprocessing-in-python/images/contrast-stretching.png
deleted file mode 100644
index 4237717..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/contrast-stretching.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/display-image-with-text.png b/docs/wsi-preprocessing-in-python/images/display-image-with-text.png
deleted file mode 100644
index e1a22a7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/display-image-with-text.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/distribution-of-svs-image-sizes.png b/docs/wsi-preprocessing-in-python/images/distribution-of-svs-image-sizes.png
deleted file mode 100644
index 93c444d..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/distribution-of-svs-image-sizes.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/entropy-grayscale.png b/docs/wsi-preprocessing-in-python/images/entropy-grayscale.png
deleted file mode 100644
index 2f03147..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/entropy-grayscale.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/entropy-original-entropy-mask.png b/docs/wsi-preprocessing-in-python/images/entropy-original-entropy-mask.png
deleted file mode 100644
index ac311dc..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/entropy-original-entropy-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/entropy-original-inverse-entropy-mask.png b/docs/wsi-preprocessing-in-python/images/entropy-original-inverse-entropy-mask.png
deleted file mode 100644
index e68969e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/entropy-original-inverse-entropy-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/entropy-original.png b/docs/wsi-preprocessing-in-python/images/entropy-original.png
deleted file mode 100644
index 35ce056..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/entropy-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/entropy.png b/docs/wsi-preprocessing-in-python/images/entropy.png
deleted file mode 100644
index 4cd08cc..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/entropy.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/eosin-channel.png b/docs/wsi-preprocessing-in-python/images/eosin-channel.png
deleted file mode 100644
index fcf88f3..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/eosin-channel.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-100.png b/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-100.png
deleted file mode 100644
index d783883..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-100.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-10000.png b/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-10000.png
deleted file mode 100644
index 625bbe7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/fill-holes-remove-small-holes-10000.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/fill-holes.png b/docs/wsi-preprocessing-in-python/images/fill-holes.png
deleted file mode 100644
index aea7cff..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/fill-holes.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/filter-example.png b/docs/wsi-preprocessing-in-python/images/filter-example.png
deleted file mode 100644
index 4cfe631..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/filter-example.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/filters-001-008.png b/docs/wsi-preprocessing-in-python/images/filters-001-008.png
deleted file mode 100644
index 9ed3c8b..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/filters-001-008.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/grays-filter.png b/docs/wsi-preprocessing-in-python/images/grays-filter.png
deleted file mode 100644
index 497ee72..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/grays-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/grayscale.png b/docs/wsi-preprocessing-in-python/images/grayscale.png
deleted file mode 100644
index b8694e4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/grayscale.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green-channel-filter.png b/docs/wsi-preprocessing-in-python/images/green-channel-filter.png
deleted file mode 100644
index ef76ab7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green-channel-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green-filter.png b/docs/wsi-preprocessing-in-python/images/green-filter.png
deleted file mode 100644
index b5ec735..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green-original.png b/docs/wsi-preprocessing-in-python/images/green-original.png
deleted file mode 100644
index 60d675e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green-pen-filter.png b/docs/wsi-preprocessing-in-python/images/green-pen-filter.png
deleted file mode 100644
index 2224764..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green-pen-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green-pen.png b/docs/wsi-preprocessing-in-python/images/green-pen.png
deleted file mode 100644
index 07745db..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/green.png b/docs/wsi-preprocessing-in-python/images/green.png
deleted file mode 100644
index 4037fc5..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/green.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/hematoxylin-channel.png b/docs/wsi-preprocessing-in-python/images/hematoxylin-channel.png
deleted file mode 100644
index 51183f4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/hematoxylin-channel.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/histogram-equalization.png b/docs/wsi-preprocessing-in-python/images/histogram-equalization.png
deleted file mode 100644
index 8e5ddc8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/histogram-equalization.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/hsv-hue-histogram.png b/docs/wsi-preprocessing-in-python/images/hsv-hue-histogram.png
deleted file mode 100644
index 3bbc559..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/hsv-hue-histogram.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/hysteresis-threshold.png b/docs/wsi-preprocessing-in-python/images/hysteresis-threshold.png
deleted file mode 100644
index fde68e7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/hysteresis-threshold.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/kmeans-original.png b/docs/wsi-preprocessing-in-python/images/kmeans-original.png
deleted file mode 100644
index c4dc401..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/kmeans-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/kmeans-segmentation-after-otsu.png b/docs/wsi-preprocessing-in-python/images/kmeans-segmentation-after-otsu.png
deleted file mode 100644
index cfb27e1..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/kmeans-segmentation-after-otsu.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/kmeans-segmentation.png b/docs/wsi-preprocessing-in-python/images/kmeans-segmentation.png
deleted file mode 100644
index 534ae18..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/kmeans-segmentation.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-blue-pen.png b/docs/wsi-preprocessing-in-python/images/not-blue-pen.png
deleted file mode 100644
index 5bf8e7b..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-blue-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-blue.png b/docs/wsi-preprocessing-in-python/images/not-blue.png
deleted file mode 100644
index 5935c7e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-blue.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-green-pen.png b/docs/wsi-preprocessing-in-python/images/not-green-pen.png
deleted file mode 100644
index 19307ee..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-green-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-green.png b/docs/wsi-preprocessing-in-python/images/not-green.png
deleted file mode 100644
index c0f92c7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-green.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-red-pen.png b/docs/wsi-preprocessing-in-python/images/not-red-pen.png
deleted file mode 100644
index dda9840..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-red-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/not-red.png b/docs/wsi-preprocessing-in-python/images/not-red.png
deleted file mode 100644
index e8f9bc1..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/not-red.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/openslide-available-slides.png b/docs/wsi-preprocessing-in-python/images/openslide-available-slides.png
deleted file mode 100644
index d83a5ca..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/openslide-available-slides.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image-zoomed.png b/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image-zoomed.png
deleted file mode 100644
index 1a9f2e6..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image-zoomed.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image.png b/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image.png
deleted file mode 100644
index 43738a8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/openslide-whole-slide-image.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/optional-tile-labels.png b/docs/wsi-preprocessing-in-python/images/optional-tile-labels.png
deleted file mode 100644
index 3128257..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/optional-tile-labels.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/otsu-mask.png b/docs/wsi-preprocessing-in-python/images/otsu-mask.png
deleted file mode 100644
index ebbe237..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/otsu-mask.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/otsu-threshold.png b/docs/wsi-preprocessing-in-python/images/otsu-threshold.png
deleted file mode 100644
index f75614d..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/otsu-threshold.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/pink-and-purple-slide.png b/docs/wsi-preprocessing-in-python/images/pink-and-purple-slide.png
deleted file mode 100644
index 6a137d4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/pink-and-purple-slide.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/purple-slide.png b/docs/wsi-preprocessing-in-python/images/purple-slide.png
deleted file mode 100644
index beb45b5..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/purple-slide.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/rag-thresh-1.png b/docs/wsi-preprocessing-in-python/images/rag-thresh-1.png
deleted file mode 100644
index 62af1e9..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/rag-thresh-1.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/rag-thresh-20.png b/docs/wsi-preprocessing-in-python/images/rag-thresh-20.png
deleted file mode 100644
index f2740ec..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/rag-thresh-20.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/rag-thresh-9.png b/docs/wsi-preprocessing-in-python/images/rag-thresh-9.png
deleted file mode 100644
index 86a9a51..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/rag-thresh-9.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/rag-thresh-original.png b/docs/wsi-preprocessing-in-python/images/rag-thresh-original.png
deleted file mode 100644
index fe2ade4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/rag-thresh-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/red-filter.png b/docs/wsi-preprocessing-in-python/images/red-filter.png
deleted file mode 100644
index bd8b818..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/red-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/red-pen-filter.png b/docs/wsi-preprocessing-in-python/images/red-pen-filter.png
deleted file mode 100644
index b887feb..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/red-pen-filter.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/red-pen-slides-filters.png b/docs/wsi-preprocessing-in-python/images/red-pen-slides-filters.png
deleted file mode 100644
index 7af55ed..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/red-pen-slides-filters.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/red-pen.png b/docs/wsi-preprocessing-in-python/images/red-pen.png
deleted file mode 100644
index 2572242..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/red-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/red.png b/docs/wsi-preprocessing-in-python/images/red.png
deleted file mode 100644
index 3791756..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/red.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/remove-more-green-more-gray.png b/docs/wsi-preprocessing-in-python/images/remove-more-green-more-gray.png
deleted file mode 100644
index a558446..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/remove-more-green-more-gray.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/remove-small-holes-100.png b/docs/wsi-preprocessing-in-python/images/remove-small-holes-100.png
deleted file mode 100644
index 2678518..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/remove-small-holes-100.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/remove-small-holes-10000.png b/docs/wsi-preprocessing-in-python/images/remove-small-holes-10000.png
deleted file mode 100644
index 8bc03e8..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/remove-small-holes-10000.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/remove-small-objects-100.png b/docs/wsi-preprocessing-in-python/images/remove-small-objects-100.png
deleted file mode 100644
index be3f6af..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/remove-small-objects-100.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/remove-small-objects-10000.png b/docs/wsi-preprocessing-in-python/images/remove-small-objects-10000.png
deleted file mode 100644
index f6b4917..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/remove-small-objects-10000.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/scoring-formula.png b/docs/wsi-preprocessing-in-python/images/scoring-formula.png
deleted file mode 100644
index 1985382..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/scoring-formula.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-rgb-hsv.png b/docs/wsi-preprocessing-in-python/images/slide-2-rgb-hsv.png
deleted file mode 100644
index 621c60a..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-rgb-hsv.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-30.png b/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-30.png
deleted file mode 100644
index ff14d34..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-30.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-31.png b/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-31.png
deleted file mode 100644
index 3c7955e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-31.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-32.png b/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-32.png
deleted file mode 100644
index 4a55279..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-row-25-col-32.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap-original.png b/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap-original.png
deleted file mode 100644
index efd0df4..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap.png b/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap.png
deleted file mode 100644
index 8f62ece..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-tile-tissue-heatmap.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1000.png b/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1000.png
deleted file mode 100644
index d48b2af..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1000.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1500.png b/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1500.png
deleted file mode 100644
index 9c30610..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-tissue-percentage-tile-1500.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-borders.png b/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-borders.png
deleted file mode 100644
index 5aa35bc..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-borders.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels-borders.png b/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels-borders.png
deleted file mode 100644
index c2b6b00..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels-borders.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels.png b/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels.png
deleted file mode 100644
index 98874af..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-top-tile-labels.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles-original.png b/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles-original.png
deleted file mode 100644
index 2ddac94..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles-original.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles.png b/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles.png
deleted file mode 100644
index b1d50ec..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-2-top-tiles.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-4-rgb.png b/docs/wsi-preprocessing-in-python/images/slide-4-rgb.png
deleted file mode 100644
index 89e8cc7..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-4-rgb.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-1.png b/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-1.png
deleted file mode 100644
index 2c6e3c2..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-1.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-2.png b/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-2.png
deleted file mode 100644
index 42a91a5..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-4-top-tile-2.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-pen.png b/docs/wsi-preprocessing-in-python/images/slide-pen.png
deleted file mode 100644
index d8f1acd..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-pen.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/slide-scan.png b/docs/wsi-preprocessing-in-python/images/slide-scan.png
deleted file mode 100644
index 5bf0dcf..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/slide-scan.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/svs-image-sizes.png b/docs/wsi-preprocessing-in-python/images/svs-image-sizes.png
deleted file mode 100644
index 6f1e83f..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/svs-image-sizes.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/tile-data.png b/docs/wsi-preprocessing-in-python/images/tile-data.png
deleted file mode 100644
index 18e4d2f..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/tile-data.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/tiles-page.png b/docs/wsi-preprocessing-in-python/images/tiles-page.png
deleted file mode 100644
index d037803..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/tiles-page.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/images/wsi-example.png b/docs/wsi-preprocessing-in-python/images/wsi-example.png
deleted file mode 100644
index 671697e..0000000
Binary files a/docs/wsi-preprocessing-in-python/images/wsi-example.png and /dev/null differ
diff --git a/docs/wsi-preprocessing-in-python/index.md b/docs/wsi-preprocessing-in-python/index.md
deleted file mode 100644
index 456829d..0000000
--- a/docs/wsi-preprocessing-in-python/index.md
+++ /dev/null
@@ -1,2775 +0,0 @@
----
-layout: default
----
-
-
-* Table of contents.
-{:toc}
-
-
-# Whole-Slide Image Preprocessing in Python
-
-
-## Project Introduction
-
-The primary goal of the [Tumor Proliferation Assessment Challenge 2016 (TUPAC16)](http://tupac.tue-image.nl/) is to
-develop algorithms to automatically predict breast cancer tumor proliferation scores. In this challenge, the training
-set consists of 500 whole-slide images which are scored (1, 2, or 3) by pathologists based on mitosis
-counts. A higher proliferation score indicates a worse prognosis since higher tumor proliferation rates are
-correlated with worse outcomes. The tissue samples are stained with hematoxylin and eosin (H&E).
-
-One of our first approaches to this challenge was to apply deep learning to breast cancer whole-slide images,
-following an approach similar to the process used by Ertosun and Rubin in
-[Automated Grading of Gliomas using Deep Learning in Digital Pathology Images: A modular approach with ensemble of
-convolutional neural networks](https://web.stanford.edu/group/rubinlab/pubs/2243353.pdf). One important part of the
-technique described by Ertosun and Rubin involves image preprocessing, where large whole-slide images are divided into
-tiles and only tiles that consist of at least 90% tissue are further analyzed. Tissue is determined by hysteresis
-thresholding on the grayscale image complement.
-
-The three TUPAC16 challenge tasks were won by Paeng et al, described in
-[A Unified Framework for Tumor Proliferation Score Prediction in Breast
-Histopathology](https://pdfs.semanticscholar.org/7d9b/ccac7a9a850cc84a980e5abeaeac2aef94e6.pdf). In their technique,
-identification of tissue regions in whole-slide images is done using Otsu thresholding, morphological operations, and
-binary dilation.
-
-Tissue identification in whole-slide images can be an important precursor to deep learning. Deep learning is
-computationally expensive and medical whole-slide images are enormous. Typically, a large portion of a slide isn't
-useful, such as the background, shadows, water, smudges, and pen marks. We can use preprocessing to
-rapidly reduce the quantity and increase the quality of the image data to be analyzed. This
-can lead to faster, more accurate model training.
-
-In this tutorial, we will take a look at whole-slide image processing and will describe various filters
-that can be used to increase the accuracy of tissue identification.
-After determining a useful set of filters for tissue segmentation, we'll divide slides into tiles and determine sets
-of tiles that typically represent good tissue samples.
-
-The solution should demonstrate high performance, flexibility, and accuracy. Filters should be easy to combine,
-chain, and modify. Tile scoring should be easy to modify for accurate tile selection. The solution should offer
-the ability to view filter, tile, and score results across large, unique datasets. The solution should also have
-the ability to work in a batch mode, where all image files and intermediary files are written to the file system,
-and in a dynamic mode, where high-scoring tissue tiles can be retrieved from the original WSI files without requiring
-any intermediary files.
-
-In summary, we will scale down whole-slide images, apply filters to these scaled-down images for tissue segmentation,
-break the slides into tiles, score the tiles, and then retrieve the top tiles based on their scores.
-
-| **5 Steps** |
-| -------------------- |
-|  |
-
-
-### Setup
-
-This project makes heavy use of Python3. Python is an ideal language for image processing.
-OpenSlide is utilized for reading WSI files. Pillow is used for basic image manipulation in Python.
-NumPy is used for fast, concise, powerful processing of images as NumPy arrays. Scikit-image is heavily used for
-a wide variety of image functionality, such as morphology, thresholding, and edge detection.
-
-Some quick setup steps on macOS follow.
-
-Install a package manager such as [Homebrew](https://brew.sh/).
-
- /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-
-Install [Python3](https://www.python.org/).
-
- brew install python3
-
-Install [OpenSlide](http://openslide.org/).
-Note that OpenSlide is licensed under the [LGPL 2.1
-License](https://raw.githubusercontent.com/openslide/openslide/master/lgpl-2.1.txt).
-
- brew install openslide
-
-Next, we can install a variety of useful Python packages using the [pip3](https://pip.pypa.io/en/stable/)
-package manager. These packages include:
-[matplotlib](https://pypi.python.org/pypi/matplotlib/),
-[numpy](https://pypi.python.org/pypi/numpy),
-[openslide-python](https://pypi.python.org/pypi/openslide-python),
-[Pillow](https://pypi.org/project/Pillow/),
-[scikit-image](https://pypi.python.org/pypi/scikit-image),
-[scikit-learn](https://pypi.python.org/pypi/scikit-learn),
-and [scipy](https://pypi.python.org/pypi/scipy).
-
- pip3 install -U matplotlib numpy openslide-python Pillow scikit-image scikit-learn scipy
-
-We utilize scikit-image filters (hysteresis thresholding) in this tutorial that are not present in the
-latest released version of scikit-image at the time of this writing (0.13.1). We can install scikit-image
-from source, as described in the README at [https://github.com/scikit-image/scikit-image](https://github.com/scikit-image/scikit-image).
-
- git clone https://github.com/scikit-image/scikit-image.git
- cd scikit-image
- pip3 install -r requirements.txt
- pip3 install .
-
-
-### Whole Slide Imaging Background
-
-A whole-slide image is a digital representation of a microscopic slide, typically at a very high level of magnification
-such as 20x or 40x. As a result of this high magnification, whole slide images are typically very large in size.
-The maximum file size for a single whole-slide image in our training dataset was 3.4 GB, with an average over 1 GB.
-
-| **WSI Example Slide** |
-| -------------------- |
-|  |
-
-
-A whole-slide image is created by a microscope that scans a slide and combines smaller images into a large image.
-Techniques include combining scanned square tiles into a whole-slide image and combining scanned strips
-into a resulting whole-slide image. Occasionally, the smaller constituent images can be
-visually discerned, as in the shaded area at the top of the slide seen below.
-
-| **Combining Smaller Images into a Whole-Slide Image** |
-| -------------------- |
-|  |
-
-
-A fairly unusual feature of whole-slide images is the very large image size.
-For our training dataset of 500 images, the width varied from 19,920 pixels to 198,220 pixels,
-with an average of 101,688 pixels. The height varied from 13,347 pixels to 256,256 pixels,
-with an average of 73,154 pixels. The image total pixel sizes varied from
-369,356,640 to 35,621,634,048 pixels, with an average of
-7,670,709,628 pixels. The 500 training images take up a total of 525 GB of storage space.
-
-| **Training Image Sizes** |
-| -------------------- |
-|  |
-
-
-Here we see a histogram distribution of the training image sizes in megapixels.
-
-| **Distribution of Images Based on Number of Pixels** |
-| -------------------- |
-|  |
-
-
-The [OpenSlide](http://openslide.org/) project can be used to read a variety of whole-slide
-image formats, including the [Aperio *.svs slide format](http://openslide.org/formats/aperio/)
-of our training image set. This is a pyramidal, tiled format, where the massive slide is composed of
-a large number of constituent tiles.
-
-To use the OpenSlide Python interface to view whole slide images, we can clone the
-[OpenSlide Python interface from GitHub](https://github.com/openslide/openslide-python)
-and utilize the included DeepZoom `deepzoom_multiserver.py` script.
-
- git clone https://github.com/openslide/openslide-python.git
- cd openslide-python/examples/deepzoom
- python3 deepzoom_multiserver.py -Q 100 WSI_DIRECTORY
-
-The `deepzoom_multiserver.py` script starts a web interface on port 5000 and displays
-the image files at the specified file system location (the `WSI_DIRECTORY` value above,
-which could be a location such as `~/git/python-wsi-preprocessing/data/`). If image
-files exist in subdirectories, they will also be displayed in the list of available
-slides.
-
-If this viewing application is installed on a server that also hosts the whole-slide image repository, this
-offers a convenient mechanism for users to view the slides without requiring local storage space.
-
-| **OpenSlide Available Slides** |
-| -------------------- |
-|  |
-
-
-Here we can see the initial view of one of the whole-slide images viewed in a web browser.
-
-| **OpenSlide Whole Slide Image** |
-| -------------------- |
-|  |
-
-
-Using this web interface, the whole-slide image can be zoomed to the highest magnification, revealing fine details at
-the tile level. Zooming and scrolling operations make it relatively easy to visually peruse the whole slide image.
-
-| **OpenSlide Whole Slide Image Zoomed** |
-| -------------------- |
-|  |
-
-
-## Scale Down Images
-
-To develop a set of filters that can be applied to an entire set of large whole-slide images, two of the first issues
-we are confronted with are the size of the data and the format of the data. As mentioned, for our training dataset,
-the average `svs` file size is over 1 GB and we have 500 total images. Additionally, the `svs` format is a fairly unusual
-format which typically can't be visually displayed by default by common applications and operating systems. Therefore, we will
-develop some code to overcome these important issues. Using OpenSlide and Python, we'll convert the training dataset to
-smaller images in a common format, thus reformulating a big data problem as a small data problem. Before filtering
-at the entire slide level, we will shrink the width and height down by a factor of 32x, which means we can perform
-filtering on 1/1024th the image data. Converting 500 `svs` files to `png` files at 1/32 scale takes
-approximately 12 minutes on a typical MacBook Pro using the code described below.
-
-In the `wsi/slide.py` file, we have many functions that can be used in relation to the original `svs` images. Of
-particular importance are the following functions:
-
- open_slide()
- show_slide()
- slide_info(display_all_properties=True)
- slide_stats()
- training_slide_to_image()
- singleprocess_training_slides_to_images()
- multiprocess_training_slides_to_images()
-
-The `open_slide()` function uses OpenSlide to read in an `svs` file. The `show_slide()` function opens a WSI `svs` file
-and displays a scaled-down version of the slide to the screen. The `slide_info()` function displays metadata
-associated with all `svs` files. The `slide_stats()` function looks at all images and summarizes size information
-about the set of slides. It also generates a variety of charts for a visual representation of the slide statistics.
-The `training_slide_to_image()` function converts a single `svs` slide to a smaller image in a more common format such as
-`jpg` or `png`. The `singleprocess_training_slides_to_images()` function converts all `svs` slides to smaller images,
-and the `multiprocess_training_slides_to_images()` function uses multiple processes (1 process per core) to
-speed up the slide conversion process. For the last three functions, when an image is saved, a thumbnail image is also
-saved. By default, the thumbnail has a maximum height or width of 300 pixels and is `jpg` format.
-
-One of the first actions we can take to become more familiar with the training dataset is to have a look at the metadata
-associated with each image, which we can do with the `slide_info()` function. Here we can see a sample of this
-metadata for Slide #1:
-
-```
-Opening Slide #1: /Volumes/BigData/TUPAC/training_slides/TUPAC-TR-001.svs
-Level count: 5
-Level dimensions: ((130304, 247552), (32576, 61888), (8144, 15472), (2036, 3868), (1018, 1934))
-Level downsamples: (1.0, 4.0, 16.0, 64.0, 128.0)
-Dimensions: (130304, 247552)
-Objective power: 40
-Associated images:
- macro:
- thumbnail:
-Format: aperio
-Properties:
- Property: aperio.AppMag, value: 40
- Property: aperio.MPP, value: 0.16437
- Property: openslide.comment, value: Aperio Image Library v11.0.37
-130304x247552 (256x256) JPEG/RGB Q=40;Mirax Digital Slide|AppMag = 40|MPP = 0.16437
- Property: openslide.level-count, value: 5
- Property: openslide.level[0].downsample, value: 1
- Property: openslide.level[0].height, value: 247552
- Property: openslide.level[0].tile-height, value: 256
- Property: openslide.level[0].tile-width, value: 256
- Property: openslide.level[0].width, value: 130304
- Property: openslide.level[1].downsample, value: 4
- Property: openslide.level[1].height, value: 61888
- Property: openslide.level[1].tile-height, value: 256
- Property: openslide.level[1].tile-width, value: 256
- Property: openslide.level[1].width, value: 32576
- Property: openslide.level[2].downsample, value: 16
- Property: openslide.level[2].height, value: 15472
- Property: openslide.level[2].tile-height, value: 256
- Property: openslide.level[2].tile-width, value: 256
- Property: openslide.level[2].width, value: 8144
- Property: openslide.level[3].downsample, value: 64
- Property: openslide.level[3].height, value: 3868
- Property: openslide.level[3].tile-height, value: 256
- Property: openslide.level[3].tile-width, value: 256
- Property: openslide.level[3].width, value: 2036
- Property: openslide.level[4].downsample, value: 128
- Property: openslide.level[4].height, value: 1934
- Property: openslide.level[4].tile-height, value: 256
- Property: openslide.level[4].tile-width, value: 256
- Property: openslide.level[4].width, value: 1018
- Property: openslide.mpp-x, value: 0.16436999999999999
- Property: openslide.mpp-y, value: 0.16436999999999999
- Property: openslide.objective-power, value: 40
- Property: openslide.quickhash-1, value: 0e0631ade42ae3384aaa727ce2e36a8272fe67039c513e17dccfdd592f6040cb
- Property: openslide.vendor, value: aperio
- Property: tiff.ImageDescription, value: Aperio Image Library v11.0.37
-130304x247552 (256x256) JPEG/RGB Q=40;Mirax Digital Slide|AppMag = 40|MPP = 0.16437
- Property: tiff.ResolutionUnit, value: inch
-```
-
-The most important metadata for our purposes is that the slide has a width of 130,304 pixels and a height of
-247,552 pixels. Note that these values are displayed as width followed by height. For most of our image processing,
-we will be using NumPy arrays, where rows (height) are followed by columns (width).
-
-If we visually look over the metadata associated with other images in the training dataset, we see that the slides
-are not consistent in their various properties such as the number of levels contained in the `svs` files. The metadata
-implies that the dataset comes from a variety of sources. The variability in the slides, especially regarding
-issues such as H&E staining and pen marks on the slides, needs to be considered during our filter development.
-
-If we call the `slide_stats()` function, in addition to the charts, we obtain a table of pixel statistics, shown
-below.
-
-**Training Images Statistics**
-
-| Attribute | Size | Slide # |
-| ---------- | --------------------- | ------- |
-| Max width | 198,220 pixels | 10 |
-| Max height | 256,256 pixels | 387 |
-| Max size | 35,621,634,048 pixels | 387 |
-| Min width | 19,920 pixels | 112 |
-| Min height | 13,347 pixels | 108 |
-| Min size | 369,356,640 pixels | 112 |
-| Avg width | 101,688 pixels | |
-| Avg height | 73,154 pixels | |
-| Avg size | 7,670,709,629 pixels | |
-
-
-The `wsi/slide.py` file contains constants that can be used to control various image conversion settings. For example,
-the `SCALE_FACTOR` constant controls the factor by which the slides will be scaled down. Its default value is 32,
-meaning that the height and width will be scaled down by a factor of 32. This means that when we perform filtering,
-it will be performed on an image 1/1024th the size of the original high-resolution image.
-The `DEST_TRAIN_EXT` constant controls the output format. We will use the default format, `png`.
-
-Using macOS, the following conversion times using
-`singleprocess_training_slides_to_images()` and `multiprocess_training_slides_to_images()`
-on the 500 image training set were obtained:
-
-**Training Image Dataset Conversion Times**
-
-| Format | Processes | Time |
-| ------ | -------------- | ------ |
-| jpg | single process | 26m09s |
-| jpg | multi process | 10m21s |
-| png | single process | 42m59s |
-| png | multi process | 11m58s |
-
-
-After calling `multiprocess_training_slides_to_images()` using the `png` format, we have 500 scaled-down
-whole-slide images in lossless `png` format that we will examine in greater detail in relation to our filters.
-
-
-### Image Saving, Displaying, and Conversions
-
-In order to load, save, and display images, we use the Python [Pillow](https://pillow.readthedocs.io/en/4.3.x/)
-package. In particular, we make use of the Image module, which contains an Image class used to represent an image.
-The `wsi/slide.py` file contains an `open_image()` function to open an image stored in the file system.
-The `get_training_image_path()` function takes a slide number and returns the path to the corresponding training image
-file, meaning the scaled-down `png` file that we created by calling `multiprocess_training_slides_to_images()`.
-
-If we want to convert a single `svs` WSI file to a scaled-down `png` (without converting all `svs` files),
-open that `png` image file as a PIL Image, and display the image to the screen, we can do the following.
-
-```
-slide.training_slide_to_image(4)
-img_path = slide.get_training_image_path(4)
-img = slide.open_image(img_path)
-img.show()
-```
-
-To mathematically manipulate the images, we use NumPy arrays. The `wsi/util.py` file contains a
-`pil_to_np_rgb()` function that converts a PIL Image to a 3-dimensional NumPy array in RGB format. The first dimension
-represents the number of rows, the second dimension represents the number of columns, and the third dimension
-represents the channel (red, green, and blue).
-
-```
-rgb = util.pil_to_np_rgb(img)
-```
-
-For convenience, the `display_img()` function can be used to display a NumPy array image. Text can be added to
-the displayed image, which can be very useful when visually comparing the results of multiple filters.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-```
-
-| **Display Image with Text** |
-| -------------------- |
-|  |
-
-
-When performing operations on NumPy arrays, functions in the `wsi/filter.py` file will often utilize the
-`util.np_info()` function to display information about the NumPy array and the amount of time required to perform the
-operation. For example, the above call to `pil_to_np_rgb()` internally calls `np_info()`:
-
-```
-t = Time()
-rgb = np.asarray(pil_img)
-np_info(rgb, "RGB", t.elapsed())
-return rgb
-```
-
-This call to `np_info()` generates console output such as the following:
-
-```
-RGB | Time: 0:00:00.162484 Type: uint8 Shape: (1385, 1810, 3)
-```
-
-We see that the PIL-to-NumPy array conversion took 0.16s. The type of the NumPy array is `uint8`, which means
-that each pixel is represented by a red, green, and blue unsigned integer value from 0 to 255. The image has a height of
-1385 pixels, a width of 1810 pixels, and three channels (representing red, green, and blue).
-
-We can obtain additional information about NumPy arrays by setting the `util.ADDITIONAL_NP_STATS` constant to `True`.
-If we rerun the above code with `ADDITIONAL_NP_STATS = True`, we see the following:
-
-```
-RGB | Time: 0:00:00.157696 Min: 2.00 Max: 255.00 Mean: 182.62 Binary: F Type: uint8 Shape: (1385, 1810, 3)
-```
-
-The minimum value is 2, the maximum value is 255, the mean value is 182.62, and binary is false, meaning that the
-image is not a binary image. A binary image is an image that consists of only two values (True or False, 1.0 or 0.0,
-255 or 0). Binary images are produced by actions such as thresholding.
-
-When interacting with NumPy image processing code, the information provided by `np_info()` can be extremely useful.
-For example, some functions return boolean NumPy arrays, other functions return float NumPy arrays, and other
-functions may return `uint8` NumPy arrays. Before performing actions on a NumPy array, it's usually necessary to know
-the data type of the array and the nature of the data in that array. For performance reasons, normally
-`ADDITIONAL_NP_STATS` should be set to `False`.
-
-The `wsi/util.py` file contains an `np_to_pil()` function that converts a NumPy array to a PIL Image.
-
-If we have a PIL Image, saving the image to the file system can be accomplished by calling the Image's `save()`
-function.
-
-```
-img.save(path)
-```
-
-
-## Apply Filters for Tissue Segmentation
-
-Next, we will investigate image filters and will determine a set of filters that can be utilized for effective
-tissue segmentation with our dataset.
-We will mask out non-tissue by setting non-tissue pixels to 0 for their red, green, and blue channels. For our
-particular dataset, our mask will AND together a green channel mask, a grays mask, a red pen mask, a green pen mask,
-and a blue pen mask. Following this, we will mask out small objects from the images.
-
-The filtering approach that we develop here has several benefits. All relevant filters are centralized in a single
-file, `wsi/filter.py`, for convenience. Filters return results in a standard format and the returned datatype can
-easily be changed (`boolean`, `uint8`, `float`). Critical filter debug information (shape, type, processing time, etc)
-is output to the console. Filter results can be easily viewed across the entire dataset or subsets of the dataset.
-Multiprocessing is used for increased performance. Additionally, filters can easily be combined, strung together,
-or otherwise modified.
-
-To filter our scaled-down 500 `png` image training set and generate 4,500 `png` filter preview images and 4,500 `jpg` thumbnails
-takes about 23m30s on my MacBook Pro. Filtering the 500 image training set without saving files takes approximately
-6 minutes.
-
-### Filters
-
-Let's take a look at several ways that our images can be filtered. Filters are represented by functions
-in the `wsi/filter.py` file and have `filter_` prepended to the function names.
-
-
-#### RGB to Grayscale
-
-A very common task in image processing is to convert an RGB image to a grayscale image. In this process, the three
-color channels are replaced by a single grayscale channel. The grayscale pixel value is computed by combining the
-red, green, and blue values in set percentages. The `filter_rgb_to_grayscale()` function multiplies the red value by
-21.25%, the green value by 71.54%, and the blue value by 7.21%, and these values are added together to obtain the
-grayscale pixel value.
-
-Although the PIL Image `convert("L")` function can also be used to convert an RGB image to a grayscale image, we
-will instead use the `filter_rgb_to_grayscale()` function, since having a reference to the RGB image as a NumPy array
-can often be very useful during image processing.
-
-Below, we'll open a slide as a PIL Image, convert this to an RGB NumPy array, and then convert this to a grayscale
-NumPy array.
-
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-util.display_img(grayscale, "Grayscale")
-```
-
-Here we see the displayed grayscale image.
-
-| **Grayscale Filter** |
-| -------------------- |
-|  |
-
-
-In the console, we see that the grayscale image is a two-dimensional NumPy array, since the 3 color channels have
-been combined into a single grayscale channel. The data type is `uint8` and pixels are represented by integer
-values between 0 and 255.
-
-
-```
-RGB | Time: 0:00:00.159974 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.101953 Type: uint8 Shape: (1385, 1810)
-```
-
-
-#### Complement
-
-In our whole-slide image training set, the slide backgrounds are illuminated by white light, which means that a `uint8`
-pixel in the background of a grayscale image is usually close to or equal to 255. However, conceptually and
-mathematically it is often useful to have background values close to or equal to 0. For example, this is useful in
-thresholding, where we might ask if a pixel value is above a particular threshold value. This can also be useful in
-masking out a background of 0 values from an image.
-
-The `filter_complement()` function inverts the values and thus the colors in the NumPy array representation of an image.
-Below, we use the `filter_complement()` function to invert the previously obtained grayscale image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-complement = filter.filter_complement(grayscale)
-util.display_img(complement, "Complement")
-```
-
-| **Complement Filter** |
-| -------------------- |
-|  |
-
-
-In the console output, we see that computing the complement is a very fast operation.
-
-```
-RGB | Time: 0:00:00.177398 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.105015 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.001439 Type: uint8 Shape: (1385, 1810)
-```
-
-
-#### Thresholding
-
-
-##### Basic Threshold
-
-With basic thresholding, a binary image is generated, where each value in the resulting NumPy array indicates
-whether the corresponding pixel in the original image is above a particular threshold value. So, a
-pixel with a value of 160 with a threshold of 150 would generate a True (or 255, or 1.0), and a pixel with a value
-of 140 with a threshold of 150 would generate a False (or 0, or 0.0).
-
-Here, we apply a basic threshold with a threshold value of 100 to the grayscale complement of the original image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-complement = filter.filter_complement(grayscale)
-thresh = filter.filter_threshold(complement, threshold=100)
-util.display_img(thresh, "Threshold")
-```
-
-The result is a binary image where pixel values that were above 100 are shown in white and pixel values that were 100 or
-lower are shown in black.
-
-| **Basic Threshold Filter** |
-| -------------------- |
-|  |
-
-
-In the console output, we see that basic thresholding is a very fast operation.
-
-```
-RGB | Time: 0:00:00.164464 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.102431 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.001397 Type: uint8 Shape: (1385, 1810)
-Threshold | Time: 0:00:00.001456 Type: bool Shape: (1385, 1810)
-```
-
-
-##### Hysteresis Threshold
-
-Hysteresis thresholding is a two-level threshold. The top-level threshold is treated in a similar fashion as basic
-thresholding. The bottom-level threshold must be exceeded and must be connected to the top-level threshold. This
-processes typically results in much better thresholding than basic thresholding. Reasonable values for the top
-and bottom thresholds for images can be determined through experimentation.
-
-The `filter_hysteresis_threshold()` function uses default bottom and top threshold values of 50 and 100. The
-default array output type from this function is `uint8`. Since the output of this function is a binary image, the
-values in the output array will be either 255 or 0. The output type of this function can be specified using the
-`output_type` parameter. Note that when performing masking, it is typically more useful to have a NumPy array of
-boolean values.
-
-Here, we perform a hysteresis threshold on the complement of the grayscale image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-complement = filter.filter_complement(grayscale)
-hyst = filter.filter_hysteresis_threshold(complement)
-util.display_img(hyst, "Hysteresis Threshold")
-```
-
-In the displayed image, the result is a binary image. All pixel values are either white (255) or black (0).
-The red display text in the corner can be ignored since it is for informational purposes only and is not present when
-we save the images to the file system.
-
-Notice that the shadow area along the top edge of the slide makes it through the hysteresis threshold filter even
-though conceptually it is background and should not be treated as tissue.
-
-| **Hysteresis Threshold Filter** |
-| -------------------- |
-|  |
-
-
-Here we see the console output from our filter operations.
-
-```
-RGB | Time: 0:00:00.167947 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.109109 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.001453 Type: uint8 Shape: (1385, 1810)
-Hysteresis Threshold | Time: 0:00:00.079292 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Otsu Threshold
-
-Thresholding using Otsu's method is another popular thresholding technique. This technique was used in the image
-processing described in [A Unified Framework for Tumor Proliferation Score Prediction in Breast
-Histopathology](https://pdfs.semanticscholar.org/7d9b/ccac7a9a850cc84a980e5abeaeac2aef94e6.pdf). This technique is
-described in more detail at
-[https://en.wikipedia.org/wiki/Otsu%27s_method](https://en.wikipedia.org/wiki/Otsu%27s_method).
-
-Let's try Otsu's method on the complement image as we did when demonstrating hysteresis thresholding.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-complement = filter.filter_complement(grayscale)
-otsu = filter.filter_otsu_threshold(complement)
-util.display_img(otsu, "Otsu Threshold")
-```
-
-
-In the resulting image, we see that Otsu's method generates roughly similar results as hysteresis thresholding.
-However, Otsu's method is less aggressive in terms of what it lets through for the tissue in the upper left
-area of the slide. The background shadow area at the top of the slide is passed through the
-filter in a similar fashion as hysteresis thresholding. Most of the slides in the training set do not have such a
-pronounced shadow area, but it would be nice to have an image processing solution that treats the shadow area as
-background.
-
-| **Otsu Threshold Filter** |
-| -------------------- |
-|  |
-
-
-In terms of performance, thresholding using Otsu's method is very fast, as we see in the console output.
-
-```
-RGB | Time: 0:00:00.166855 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.111960 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.001746 Type: uint8 Shape: (1385, 1810)
-Otsu Threshold | Time: 0:00:00.014615 Type: uint8 Shape: (1385, 1810)
-```
-
-
-#### Contrast
-
-For an image, suppose we have a histogram of the number of pixels (intensity on y-axis) plotted against the range
-of possible pixel values (x-axis, 0 to 255). Contrast is a measure of the difference in intensities. An image with
-low contrast is typically dull and details are not clearly seen visually. An image with high contrast is typically
-sharp and details can clearly be discerned. Increasing the contrast in an image can be used to bring out various details
-in the image.
-
-
-##### Contrast Stretching
-
-One form of increasing the contrast in an image is contrast stretching. Suppose that all intensities in an image occur
-between 100 and 150 on a scale from 0 to 255. If we rescale the intensities so that 100 now corresponds to 0 and
-150 corresponds to 255 and we linearly rescale the intensities between these points, we have increased the contrast
-in the image and differences in detail can more clearly be seen. This is contrast stretching.
-
-As an example, here we perform contrast stretching with a low pixel value of 100 and a high pixel value of 200 on
-the complement of the grayscale image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-complement = filter.filter_complement(grayscale)
-contrast_stretch = filter.filter_contrast_stretch(complement, low=100, high=200)
-util.display_img(contrast_stretch, "Contrast Stretch")
-```
-
-This can be used to visually inspect details in the previous intensity range of 100 to 200, since the image filter has
-spread out this range across the full spectrum.
-
-
-| **Contrast Stretching Filter** |
-| -------------------- |
-|  |
-
-
-Here we see the console output from this set of filters.
-
-```
-RGB | Time: 0:00:00.171582 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.110818 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.002410 Type: uint8 Shape: (1385, 1810)
-Contrast Stretch | Time: 0:00:00.058357 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Histogram Equalization
-
-Histogram equalization is another technique that can be used to increase contrast in an image. However, unlike
-contrast stretching, which has a linear distribution of the resulting intensities, the histogram equalization
-transformation is based on probabilities and is non-linear. For more information about histogram equalization, please
-see [https://en.wikipedia.org/wiki/Histogram_equalization](https://en.wikipedia.org/wiki/Histogram_equalization).
-
-As an example, here we display the grayscale image. We increase contrast in the grayscale image using histogram
-equalization and display the resulting image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-util.display_img(grayscale, "Grayscale")
-hist_equ = filter.filter_histogram_equalization(grayscale)
-util.display_img(hist_equ, "Histogram Equalization")
-```
-
-Comparing the grayscale image and the image after histogram equalization, we see that contrast in the image has been
-increased.
-
-| **Grayscale Filter** | **Histogram Equalization Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console output following histogram equalization is shown here.
-
-```
-RGB | Time: 0:00:00.175498 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.110181 Type: uint8 Shape: (1385, 1810)
-Hist Equalization | Time: 0:00:00.116568 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Adaptive Equalization
-
-Rather than applying a single transformation to all pixels in an image, adaptive histogram equalization applies
-transformations to local regions in an image. As a result, adaptive equalization allows contrast to be enhanced to
-different extents in different regions based on the regions' intensity histograms. For more information about adaptive
-equalization, please see
-[https://en.wikipedia.org/wiki/Adaptive_histogram_equalization](https://en.wikipedia.org/wiki/Adaptive_histogram_equalization).
-
-The `filter_adaptive_equalization()` function utilizes the scikit-image contrast limited adaptive histogram
-equalization (CLAHE) implementation. Below, we apply adaptive equalization to the grayscale image and display both
-the grayscale image and the image after adaptive equalization for comparison.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-grayscale = filter.filter_rgb_to_grayscale(rgb)
-util.display_img(grayscale, "Grayscale")
-adaptive_equ = filter.filter_adaptive_equalization(grayscale)
-util.display_img(adaptive_equ, "Adaptive Equalization")
-```
-
-| **Grayscale Filter** | **Adaptive Equalization Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-In the console output, we can see that adaptive equalization is more compute-intensive than constrast stretching and
-histogram equalization.
-
-```
-RGB | Time: 0:00:00.167076 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.106797 Type: uint8 Shape: (1385, 1810)
-Adapt Equalization | Time: 0:00:00.223172 Type: uint8 Shape: (1385, 1810)
-```
-
-
-#### Color
-
-The WSI tissue samples in the training dataset have been H&E stained. Eosin stains basic structures such as
-most cytoplasm proteins with a pink tone. Hematoxylin stains acidic structures such as DNA and RNA with a purple
-tone. This means that cells tend to be stained pink, and particular areas of the cells such as the nuclei tend to be
-stained purple. However, note that appearance can vary greatly based on the types of cells that are stained and the
-amounts of stain applied.
-
-As an example of staining differences, below we see a slide that has pink and purple staining next to another slide
-where all tissue appears purple.
-
-| **Pink and Purple Slide** | **Purple Slide** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Another factor regarding color is that many slides have been marked with red, green, and blue pens. Whereas in general
-we would like our filters to include pink and purple colors, since these typically indicate stained tissue, we would
-like our filters to exclude red, green, and blue colors, since these typically indicate pen marks on the slides which
-are not tissue.
-
-Below, we see an example of a slide that has been marked with red pen and some green pen.
-
-| **Slide Marked with Red and Green Pen** |
-| -------------------- |
-|  |
-
-
-Developing color filters that can be used to filter tissue areas can be fairly challenging for a variety of reasons,
-including:
-
-1. Filters need to be general enough to work across all slides in the dataset.
-2. Filters should handle issues such as variations in shadows and lighting.
-3. The amount of H&E (purple and pink) staining can vary greatly from slide to slide.
-4. Pen mark colors (red, green, and blue) vary due to issues such as lighting and pen marks over tissue.
-5. There can be color overlap between stained tissue and pen marks, so we need to balance how aggressively stain
-colors are inclusively filtered and how pen colors are exclusively filtered.
-
-
-##### RGB to HED
-
-The scikit-image `skimage.color` package features an `rgb2hed()` function that performs color deconvolution on the
-original RGB image to create HED (Hematoxylin, Eosin, Diaminobenzidine) channels. The `filter_rgb_to_hed()` function
-encapsulates `rgb2hed()`. The `filter_hed_to_hematoxylin()` and `filter_hed_to_eosin()` functions read the hematoxylin
-and eosin channels and rescale the resulting 2-dimensional NumPy arrays (for example, 0 to 255 for `uint8`)
-to increase contrast.
-
-Here, we'll convert the RGB image to an HED image. We'll then obtain the hematoxylin and eosin channels and display
-the resulting images.
-
-```
-img_path = slide.get_training_image_path(4)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-hed = filter.filter_rgb_to_hed(rgb)
-hema = filter.filter_hed_to_hematoxylin(hed)
-util.display_img(hema, "Hematoxylin Channel")
-eosin = filter.filter_hed_to_eosin(hed)
-util.display_img(eosin, "Eosin Channel")
-```
-
-Notice that the hematoxylin channel does fairly well at detecting the purple areas of the original slide,
-which could potentially be used to narrow in on tissue with cell nuclei and thus on regions that can be inspected for
-mitoses. Both the hematoxylin and eosin channel filters include the background in the resulting image, which is
-rather unfortunate in terms of differentiating tissue from non-tissue. Also, notice in the eosin channel that the red
-pen is considered to be part of the eosin stain spectrum.
-
-
-| **Hematoxylin Channel** | **Eosin Channel** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.397570 Type: uint8 Shape: (2594, 2945, 3)
-RGB to HED | Time: 0:00:01.322220 Type: uint8 Shape: (2594, 2945, 3)
-HED to Hematoxylin | Time: 0:00:00.136750 Type: uint8 Shape: (2594, 2945)
-HED to Eosin | Time: 0:00:00.086537 Type: uint8 Shape: (2594, 2945)
-```
-
-
-##### Green Channel Filter
-
-If we look at an RGB color wheel, we see that purple and pink are next to each other. On the other side of color wheel,
-we have yellow and green. Since green is one of our 3 NumPy array RGB color channels, filtering out pixels that have a
-high green channel value can be one way to potentially filter out parts of the slide that are not pink or purple. This
-includes the white background, since white also has a high green channel value along with high red and blue channel
-values.
-
-We'll use the default green threshold value of 200 for the `filter_green_channel()` function, meaning that any pixels
-with green channel values of 200 or greater will be rejected.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_green = filter.filter_green_channel(rgb)
-util.display_img(not_green, "Green Channel Filter")
-```
-
-The green channel filter does a decent job of differentiating the tissue from the white background. However, notice
-that the shadow area at the top of the slide is not excluded by the filter.
-
-| **Original Slide** | **Green Channel Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-A filter such as the green channel filter most likely would be used in conjunction with other filters for masking
-purposes. As a result, the default output type for the green channel filter is `bool`, as we see in the console
-output. If another output type is desired, this can be set with the function's `output_type` parameter.
-
-```
-RGB | Time: 0:00:00.169249 Type: uint8 Shape: (1385, 1810, 3)
-Filter Green Channel | Time: 0:00:00.005618 Type: bool Shape: (1385, 1810)
-```
-
-
-##### Grays Filter
-
-Next, let's utilize a filter that can filter out the annoying shadow area at the top of slide #2. Notice that the
-shadow area consists of a gradient of dark-to-light grays. A gray pixel has red, green, and blue channel values that
-are close together. The `filter_grays()` function filters out pixels that have red, blue, and green values that
-are within a certain tolerance of each other. The default tolerance for `filter_grays()` is 15. The grays filter
-also filters out white and black pixels, since they have similar red, green, and blue values.
-
-Here, we run the grays filter on the original RGB image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_grays = filter.filter_grays(rgb)
-util.display_img(not_grays, "Grays Filter")
-```
-
-Notice that in addition to filtering out the white background, the grays filter has indeed filtered out the shadow
-area at the top of the slide.
-
-| **Original Slide** | **Grays Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-Like the green channel filter, the default type of the returned array is `bool` since the grays filter will typically
-be used in combination with other filters. Since the grays filter is fast, it offers a potentially
-low-cost way to filter out shadows from the slides during preprocessing.
-
-```
-RGB | Time: 0:00:00.169642 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.082075 Type: bool Shape: (1385, 1810)
-```
-
-
-##### Red Filter
-
-Next, let's turn our attention to filtering out shades of red, which can be used to filter out a significant amount of
-the red pen color. The red pen consists of a wide variety of closely related red shades. Certain shades are
-reddish, others are maroonish, and others are pinkish, for example. These color gradations are a result of a variety of
-factors, such as the amount of ink, lighting, shadowing, and tissue under the pen marks.
-
-The `filter_red()` function filters out reddish colors through a red channel lower threshold value, a green channel
-upper threshold value, and a blue channel upper threshold value. The generated mask is based on a pixel being above
-the red channel threshold value and below the green and blue channel threshold values. One way to determine these
-values is to display the slide image in a web browser and use a tool such as the Chrome ColorPick Eyedropper to
-click on a red pen pixel to determine the approximate red, green, and blue values.
-
-In this example with slide #4, we use a red threshold value of 150, a green threshold value of 80, and a blue
-threshold value of 90. In addition, to help us visualize the filter results, we apply the red filter to the
-original RGB image as a mask and also apply the inverse of the red filter to the original image as a mask.
-
-```
-img_path = slide.get_training_image_path(4)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_red = filter.filter_red(rgb, red_lower_thresh=150, green_upper_thresh=80, blue_upper_thresh=90, display_np_info=True)
-util.display_img(not_red, "Red Filter (150, 80, 90)")
-util.display_img(util.mask_rgb(rgb, not_red), "Not Red")
-util.display_img(util.mask_rgb(rgb, ~not_red), "Red")
-```
-
-We see that the red filter filters out much of the red pen.
-
-| **Original Slide** | **Red Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Applying the red filter and the inverse of the red filter as masks to the original image, we see that our threshold
-values did quite well at filtering out a large amount of the red pen.
-
-| **Not Red** | **Red** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Here we see the console output from the above image filtering:
-
-```
-RGB | Time: 0:00:00.404069 Type: uint8 Shape: (2594, 2945, 3)
-Filter Red | Time: 0:00:00.034864 Type: bool Shape: (2594, 2945)
-Mask RGB | Time: 0:00:00.053997 Type: uint8 Shape: (2594, 2945, 3)
-Mask RGB | Time: 0:00:00.022750 Type: uint8 Shape: (2594, 2945, 3)
-```
-
-
-##### Red Pen Filter
-
-Next, let's turn our attention to a more inclusive red pen filter that handles more shades of red. Since the
-`filter_red()` function returns a boolean array result, we can combine multiple sets of `filter_red()` threshold
-values (`red_lower_thresh`, `green_upper_thresh`, `blue_upper_thresh`) using boolean operators such as `&`. We can
-determine these values using a color picker tool such as the Chrome ColorPick Eyedropper, as mentioned previously.
-In addition to determining various shades of red pen on a single slide, shades of red pen from other slides should be
-identified and included. Note that we need to be careful with pinkish shades of red due to the similarity of these
-shades to eosin staining.
-
-Using the color picker technique, the `filter_red_pen()` function utilizes the following sets of red threshold values.
-
-```
-result = filter_red(rgb, red_lower_thresh=150, green_upper_thresh=80, blue_upper_thresh=90) & \
- filter_red(rgb, red_lower_thresh=110, green_upper_thresh=20, blue_upper_thresh=30) & \
- filter_red(rgb, red_lower_thresh=185, green_upper_thresh=65, blue_upper_thresh=105) & \
- filter_red(rgb, red_lower_thresh=195, green_upper_thresh=85, blue_upper_thresh=125) & \
- filter_red(rgb, red_lower_thresh=220, green_upper_thresh=115, blue_upper_thresh=145) & \
- filter_red(rgb, red_lower_thresh=125, green_upper_thresh=40, blue_upper_thresh=70) & \
- filter_red(rgb, red_lower_thresh=200, green_upper_thresh=120, blue_upper_thresh=150) & \
- filter_red(rgb, red_lower_thresh=100, green_upper_thresh=50, blue_upper_thresh=65) & \
- filter_red(rgb, red_lower_thresh=85, green_upper_thresh=25, blue_upper_thresh=45)
-```
-
-Let's apply the red pen filter to slide #4.
-
-```
-img_path = slide.get_training_image_path(4)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_red_pen = filter.filter_red_pen(rgb)
-util.display_img(not_red_pen, "Red Pen Filter")
-util.display_img(util.mask_rgb(rgb, not_red_pen), "Not Red Pen")
-util.display_img(util.mask_rgb(rgb, ~not_red_pen), "Red Pen")
-```
-
-| **Original Slide** | **Red Pen Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-Compared with using a single set of red threshold values, we can see that the red pen filter is significantly
-more inclusive in terms of the shades of red that are accepted. As a result, more red pen is filtered. However, notice
-that some of the pinkish-red from eosin-stained tissue is also included as a result of this more aggressive filtering.
-
-
-| **Not Red Pen** | **Red Pen** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Even though the red pen filter ANDs nine sets of red filter results together, we see that the performance is excellent.
-
-```
-RGB | Time: 0:00:00.392082 Type: uint8 Shape: (2594, 2945, 3)
-Filter Red Pen | Time: 0:00:00.251170 Type: bool Shape: (2594, 2945)
-Mask RGB | Time: 0:00:00.037256 Type: uint8 Shape: (2594, 2945, 3)
-Mask RGB | Time: 0:00:00.026589 Type: uint8 Shape: (2594, 2945, 3)
-```
-
-##### Blue Filter
-
-If we visually examine the 500 slides in the training dataset, we see that several of the slides have been marked
-with blue pen. Rather than blue lines, many of the blue marks consist of blue dots surrounding particular areas of
-interest on the slides, although this is not always the case. Some of the slides also have blue pen lines. Once again,
-the blue pen marks consist of several gradations of blue.
-
-We'll start by creating a filter to filter out blue. The `filter_blue()` function operates in a similar way as the
-`filter_red()` function. It takes a red channel upper threshold value, a green channel upper threshold value, and
-a blue channel lower threshold value. The generated mask is based on a pixel being below the red channel threshold
-value, below the green channel threshold value, and above the blue channel threshold value.
-
-Once again, we'll apply the results of the blue filter and the inverse of the blue filter as masks to the original
-RGB image to help visualize the filter results.
-
-```
-img_path = slide.get_training_image_path(241)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_blue = filter.filter_blue(rgb, red_upper_thresh=130, green_upper_thresh=155, blue_lower_thresh=180, display_np_info=True)
-util.display_img(not_blue, "Blue Filter (130, 155, 180)")
-util.display_img(util.mask_rgb(rgb, not_blue), "Not Blue")
-util.display_img(util.mask_rgb(rgb, ~not_blue), "Blue")
-```
-
-We see that a lot of the blue pen has been filtered out.
-
-| **Original Slide** | **Blue Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Not Blue** | **Blue** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.432772 Type: uint8 Shape: (2058, 3240, 3)
-Filter Blue | Time: 0:00:00.029066 Type: bool Shape: (2058, 3240)
-Mask RGB | Time: 0:00:00.038966 Type: uint8 Shape: (2058, 3240, 3)
-Mask RGB | Time: 0:00:00.021153 Type: uint8 Shape: (2058, 3240, 3)
-```
-
-
-##### Blue Pen Filter
-
-In `filter_blue_pen()`, we AND together various blue shade ranges using `filter_blue()` with
-sets of red, green, and blue threshold values to create a blue pen filter that filters out various shades of blue.
-
-```
-result = filter_blue(rgb, red_upper_thresh=60, green_upper_thresh=120, blue_lower_thresh=190) & \
- filter_blue(rgb, red_upper_thresh=120, green_upper_thresh=170, blue_lower_thresh=200) & \
- filter_blue(rgb, red_upper_thresh=175, green_upper_thresh=210, blue_lower_thresh=230) & \
- filter_blue(rgb, red_upper_thresh=145, green_upper_thresh=180, blue_lower_thresh=210) & \
- filter_blue(rgb, red_upper_thresh=37, green_upper_thresh=95, blue_lower_thresh=160) & \
- filter_blue(rgb, red_upper_thresh=30, green_upper_thresh=65, blue_lower_thresh=130) & \
- filter_blue(rgb, red_upper_thresh=130, green_upper_thresh=155, blue_lower_thresh=180) & \
- filter_blue(rgb, red_upper_thresh=40, green_upper_thresh=35, blue_lower_thresh=85) & \
- filter_blue(rgb, red_upper_thresh=30, green_upper_thresh=20, blue_lower_thresh=65) & \
- filter_blue(rgb, red_upper_thresh=90, green_upper_thresh=90, blue_lower_thresh=140) & \
- filter_blue(rgb, red_upper_thresh=60, green_upper_thresh=60, blue_lower_thresh=120) & \
- filter_blue(rgb, red_upper_thresh=110, green_upper_thresh=110, blue_lower_thresh=175)
-```
-
-We apply the filter and its inverse to the original slide to help us visualize the results.
-
-```
-img_path = slide.get_training_image_path(241)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_blue_pen = filter.filter_blue_pen(rgb)
-util.display_img(not_blue_pen, "Blue Pen Filter")
-util.display_img(util.mask_rgb(rgb, not_blue_pen), "Not Blue Pen")
-util.display_img(util.mask_rgb(rgb, ~not_blue_pen), "Blue Pen")
-```
-
-For this slide, we see that `filter_blue_pen()` filters out more blue than the previous `filter_blue()` example.
-
-| **Original Slide** | **Blue Pen Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Not Blue Pen** | **Blue Pen** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-We see from the console output that the blue pen filter is quite fast.
-
-```
-RGB | Time: 0:00:00.348514 Type: uint8 Shape: (2058, 3240, 3)
-Filter Blue Pen | Time: 0:00:00.288286 Type: bool Shape: (2058, 3240)
-Mask RGB | Time: 0:00:00.033348 Type: uint8 Shape: (2058, 3240, 3)
-Mask RGB | Time: 0:00:00.019622 Type: uint8 Shape: (2058, 3240, 3)
-```
-
-As an aside, we can quantify the differences in filtering between the `filter_blue()` and `filter_blue_pen()`
-results.
-
-```
-not_blue = filter.filter_blue(rgb, red_upper_thresh=130, green_upper_thresh=155, blue_lower_thresh=180, display_np_info=True)
-not_blue_pen = filter.filter_blue_pen(rgb)
-print("filter_blue: " + filter.mask_percentage_text(filter.mask_percent(not_blue)))
-print("filter_blue_pen: " + filter.mask_percentage_text(filter.mask_percent(not_blue_pen)))
-```
-
-The `filter_blue()` example filters out 0.45% of the slide pixels and the `filter_blue_pen()` example filters out
-0.69% of the slide pixels.
-
-```
-filter_blue: 0.45%
-filter_blue_pen: 0.69%
-```
-
-##### Green Filter
-
-We utilize the `filter_green()` function to filter green color shades. Using a color picker tool,
-if we examine the green pen marks on the slides, the green and blue channel
-values for pixels appear to track together. As a result of this, this function has a red channel upper
-threshold value, a green channel lower threshold value, and a blue channel lower threshold value.
-
-```
-img_path = slide.get_training_image_path(51)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_green = filter.filter_green(rgb, red_upper_thresh=150, green_lower_thresh=160, blue_lower_thresh=140, display_np_info=True)
-util.display_img(not_green, "Green Filter (150, 160, 140)")
-util.display_img(util.mask_rgb(rgb, not_green), "Not Green")
-util.display_img(util.mask_rgb(rgb, ~not_green), "Green")
-```
-
-Using a red upper threshold of 150, a green lower threshold of 160, and a blue lower threshold of 140, we see that the
-much of the green ink above the background is filtered out, but most of the green ink above the tissue is not filtered
-out.
-
-| **Original Slide** | **Green Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Not Green** | **Green** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.611914 Type: uint8 Shape: (2291, 3839, 3)
-Filter Green | Time: 0:00:00.077429 Type: bool Shape: (2291, 3839)
-Mask RGB | Time: 0:00:00.049026 Type: uint8 Shape: (2291, 3839, 3)
-Mask RGB | Time: 0:00:00.027211 Type: uint8 Shape: (2291, 3839, 3)
-```
-
-##### Green Pen Filter
-
-To handle the green pen shades, the `filter_green_pen()` function combines different shade results using sets of
-red, green, and blue threshold values passed to the `filter_green()` function.
-
-```
-result = filter_green(rgb, red_upper_thresh=150, green_lower_thresh=160, blue_lower_thresh=140) & \
- filter_green(rgb, red_upper_thresh=70, green_lower_thresh=110, blue_lower_thresh=110) & \
- filter_green(rgb, red_upper_thresh=45, green_lower_thresh=115, blue_lower_thresh=100) & \
- filter_green(rgb, red_upper_thresh=30, green_lower_thresh=75, blue_lower_thresh=60) & \
- filter_green(rgb, red_upper_thresh=195, green_lower_thresh=220, blue_lower_thresh=210) & \
- filter_green(rgb, red_upper_thresh=225, green_lower_thresh=230, blue_lower_thresh=225) & \
- filter_green(rgb, red_upper_thresh=170, green_lower_thresh=210, blue_lower_thresh=200) & \
- filter_green(rgb, red_upper_thresh=20, green_lower_thresh=30, blue_lower_thresh=20) & \
- filter_green(rgb, red_upper_thresh=50, green_lower_thresh=60, blue_lower_thresh=40) & \
- filter_green(rgb, red_upper_thresh=30, green_lower_thresh=50, blue_lower_thresh=35) & \
- filter_green(rgb, red_upper_thresh=65, green_lower_thresh=70, blue_lower_thresh=60) & \
- filter_green(rgb, red_upper_thresh=100, green_lower_thresh=110, blue_lower_thresh=105) & \
- filter_green(rgb, red_upper_thresh=165, green_lower_thresh=180, blue_lower_thresh=180) & \
- filter_green(rgb, red_upper_thresh=140, green_lower_thresh=140, blue_lower_thresh=150) & \
- filter_green(rgb, red_upper_thresh=185, green_lower_thresh=195, blue_lower_thresh=195)
-```
-
-If we apply the green pen filter, we see that it includes most of the green shades above the tissue in slide 51.
-
-```
-img_path = slide.get_training_image_path(51)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "RGB")
-not_green_pen = filter.filter_green_pen(rgb)
-util.display_img(not_green_pen, "Green Pen Filter")
-util.display_img(util.mask_rgb(rgb, not_green_pen), "Not Green Pen")
-util.display_img(util.mask_rgb(rgb, ~not_green_pen), "Green Pen")
-```
-
-| **Original Slide** | **Green Pen Filter** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Not Green Pen** | **Green Pen** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Like the other pen filters, the green pen filter's performance is quite good.
-
-```
-RGB | Time: 0:00:00.540223 Type: uint8 Shape: (2291, 3839, 3)
-Filter Green Pen | Time: 0:00:00.487728 Type: bool Shape: (2291, 3839)
-Mask RGB | Time: 0:00:00.044024 Type: uint8 Shape: (2291, 3839, 3)
-Mask RGB | Time: 0:00:00.022867 Type: uint8 Shape: (2291, 3839, 3)
-```
-
-
-##### K-Means Segmentation
-
-The scikit-image library contains functionality that allows for image segmentation using k-means clustering based
-on location and color. This allows regions of similarly colored pixels to be grouped together. These regions are
-colored based on the average color of the pixels in the individual regions. This could potentially be used to filter
-regions based on their colors, where we could filter on pink shades for eosin-stained tissue and purple shades for
-hematoxylin-stained tissue.
-
-The `filter_kmeans_segmentation()` function has a default value of 800 segments. We'll increase this to 3000 using
-the `n_segments` parameter. In the example below, we'll perform k-means segmentation on the original image. In
-addition, we'll create a threshold using Otsu's method and apply the resulting mask to the original image. We'll then
-perform k-means segmentation on that image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-kmeans_seg = filter.filter_kmeans_segmentation(rgb, n_segments=3000)
-util.display_img(kmeans_seg, "K-Means Segmentation", bg=True)
-otsu_mask = util.mask_rgb(rgb, filter.filter_otsu_threshold(filter.filter_complement(filter.filter_rgb_to_grayscale(rgb)), output_type="bool"))
-util.display_img(otsu_mask, "Image after Otsu Mask", bg=True)
-kmeans_seg_otsu = filter.filter_kmeans_segmentation(otsu_mask, n_segments=3000)
-util.display_img(kmeans_seg_otsu, "K-Means Segmentation after Otsu Mask", bg=True)
-```
-
-
-| **Original Slide** | **K-Means Segmentation** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Image after Otsu Mask** | **K-Means Segmentation after Otsu Mask** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Note that there are a couple practical difficulties in terms of implementing automated tissue detection using k-means
-segmentation. To begin with, due to the variation in tissue stain colors across the image dataset, it can be difficult
-to filter on "pinkish" and "purplish" colors across all the slides. In addition, the k-means segmentation technique
-is very computationally expensive, as we can see in the console output. The compute time increases with the number
-of segments. For 3000 segments, we have a filter time of ~20 seconds, whereas all operations that we have seen up to
-this point are subsecond. If we use the default value of 800 segments, compute time for the k-means segmentation filter
-is ~7 seconds.
-
-```
-RGB | Time: 0:00:00.172848 Type: uint8 Shape: (1385, 1810, 3)
-K-Means Segmentation | Time: 0:00:20.238886 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.076287 Type: uint8 Shape: (1385, 1810)
-Complement | Time: 0:00:00.000374 Type: uint8 Shape: (1385, 1810)
-Otsu Threshold | Time: 0:00:00.013864 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.008522 Type: uint8 Shape: (1385, 1810, 3)
-K-Means Segmentation | Time: 0:00:20.130044 Type: uint8 Shape: (1385, 1810, 3)
-```
-
----
-
-The sci-kit image library also makes it possible to combine similarly colored regions. One way to do this with the
-k-means segmentation results is to build a region adjacency graph (RAG) and combine regions based on a threshold value.
-The `filter_rag_threshold()` function performs k-means segmentation, builds the RAG, and allows us to pass in the RAG
-threshold value.
-
-Here, we perform k-means segmentation, build a RAG, and apply different RAG thresholds to combine similar regions.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-rag_thresh = filter.filter_rag_threshold(rgb)
-util.display_img(rag_thresh, "RAG Threshold (9)", bg=True)
-rag_thresh = filter.filter_rag_threshold(rgb, threshold=1)
-util.display_img(rag_thresh, "RAG Threshold (1)", bg=True)
-rag_thresh = filter.filter_rag_threshold(rgb, threshold=20)
-util.display_img(rag_thresh, "RAG Threshold (20)", bg=True)
-```
-
-| **Original Slide** | **RAG Threshold = 9** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **RAG Threshold = 1** | **RAG Threshold = 20** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Even using the default 800 number of segments for the k-means segmentation, we see that this technique is very
-computationally expensive.
-
-```
-RGB | Time: 0:00:00.462239 Type: uint8 Shape: (1385, 1810, 3)
-RAG Threshold | Time: 0:00:24.677776 Type: uint8 Shape: (1385, 1810, 3)
-RAG Threshold | Time: 0:00:26.683581 Type: uint8 Shape: (1385, 1810, 3)
-RAG Threshold | Time: 0:00:23.774296 Type: uint8 Shape: (1385, 1810, 3)
-```
-
-
-##### RGB to HSV
-
-Comparing hematoxylin and eosin staining can be challenging in the RGB color space. One way to simplify
-this comparison is to convert to a different color space such as HSV (Hue-Saturation-Value).
-The scikit-image `skimage.color` package features an `rgb2hsv()` function that converts an RGB image
-to an HSV image. The `filter_rgb_to_hsv()` function wraps this scikit-image function.
-In the HSV color model, the hue is represented by 360 degrees. Purple has a hue of 270 and
-pink has a hue of 330. We discuss hematoxylin and eosin stain comparison in our later discussion
-of tile scoring, where we favor hematoxylin-stained tissue over eosin-stained tissue.
-
-As an example, in the `wsi/tiles.py` file, the `display_image_with_rgb_and_hsv_histograms()`
-function takes in an image as a NumPy array in RGB color space and displays the image
-along with its RGB and HSV histograms. Internally, this function utilizes the `filter_rgb_to_hsv()`
-function.
-
-
-```
-# To get around renderer issue on OSX going from Matplotlib image to NumPy image.
-import matplotlib
-matplotlib.use('Agg')
-
-from deephistopath.wsi import slide
-from deephistopath.wsi import tiles
-from deephistopath.wsi import util
-
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-tiles.display_image_with_rgb_and_hsv_histograms(rgb)
-```
-
-Here we see slide #2 along with its RGB and HSV histograms. Notice that the HSV hue histogram
-columns have additionally been colored based on their corresponding hue values to aid in
-visual inspection.
-
-| **Slide 2 RGB and HSV Histograms** |
-| -------------------- |
-|  |
-
-
-#### Morphology
-
-Information about image morphology can be found at
-[https://en.wikipedia.org/wiki/Mathematical_morphology](https://en.wikipedia.org/wiki/Mathematical_morphology).
-The primary morphology operators are erosion, dilation, opening, and closing. With erosion, pixels along the edges
-of an object are removed. With dilation, pixels along the edges of an object are added. Opening is erosion followed
-by dilation. Closing is dilation followed by erosion. With morphology operators, a structuring element (such as
-a square, circle, cross, etc) is passed along the edges of the objects to perform the operations. Morphology operators
-are typically performed on binary and grayscale images. In our examples, we apply morphology operators to binary
-images (2-dimensional arrays of 2 values, such as True/False, 1.0/0.0, and 255/0).
-
-
-##### Erosion
-
-Let's have a look at an erosion example.
-We create a binary image by calling the `filter_grays()` function on the original RGB image. The
-`filter_binary_erosion()` function uses a disk as the structuring element that erodes the edges of the
-"No Grays" binary image. We demonstrate erosion with disk structuring elements of radius 5 and radius 20.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-bin_erosion_5 = filter.filter_binary_erosion(no_grays, disk_size=5)
-util.display_img(bin_erosion_5, "Binary Erosion (5)", bg=True)
-bin_erosion_20 = filter.filter_binary_erosion(no_grays, disk_size=20)
-util.display_img(bin_erosion_20, "Binary Erosion (20)", bg=True)
-```
-
-| **Original Slide** | **No Grays** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Binary Erosion (disk_size = 5)** | **Binary Erosion (disk_size = 20)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Notice that increasing the structuring element radius increases the compute time.
-
-```
-RGB | Time: 0:00:00.171309 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.086484 Type: bool Shape: (1385, 1810)
-Binary Erosion | Time: 0:00:00.167290 Type: uint8 Shape: (1385, 1810)
-Binary Erosion | Time: 0:00:00.765442 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Dilation
-
-The `filter_binary_dilation()` function utilizes a disk structuring element in a similar manner as the corresponding
-erosion function. We'll utilize the same "No Grays" binary image from the previous example and dilate the image
-utilizing a disk radius of 5 pixels followed by a disk radius of 20 pixels.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-bin_dilation_5 = filter.filter_binary_dilation(no_grays, disk_size=5)
-util.display_img(bin_dilation_5, "Binary Dilation (5)", bg=True)
-bin_dilation_20 = filter.filter_binary_dilation(no_grays, disk_size=20)
-util.display_img(bin_dilation_20, "Binary Dilation (20)", bg=True)
-```
-
-We see that dilation expands the edges of the binary image as opposed to the erosion, which shrinks the edges.
-
-| **Binary Dilation (disk_size = 5)** | **Binary Dilation (disk_size = 20)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.176491 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.081817 Type: bool Shape: (1385, 1810)
-Binary Dilation | Time: 0:00:00.096302 Type: uint8 Shape: (1385, 1810)
-Binary Dilation | Time: 0:00:00.538761 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Opening
-
-As mentioned, opening is erosion followed by dilation. Opening can be used to remove small foreground objects.
-
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-bin_opening_5 = filter.filter_binary_opening(no_grays, disk_size=5)
-util.display_img(bin_opening_5, "Binary Opening (5)", bg=True)
-bin_opening_20 = filter.filter_binary_opening(no_grays, disk_size=20)
-util.display_img(bin_opening_20, "Binary Opening (20)", bg=True)
-```
-
-| **Binary Opening (disk_size = 5)** | **Binary Opening (disk_size = 20)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Opening is a fairly expensive operation, since it is an erosion followed by a dilation. The compute time increases
-with the size of the structuring element. The 5-pixel disk radius for the structuring element results in a 0.25s
-operation, whereas the 20-pixel disk radius results in a 2.45s operation.
-
-```
-RGB | Time: 0:00:00.169241 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.085474 Type: bool Shape: (1385, 1810)
-Binary Opening | Time: 0:00:00.248629 Type: uint8 Shape: (1385, 1810)
-Binary Opening | Time: 0:00:02.452089 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Closing
-
-Closing is a dilation followed by an erosion. Closing can be used to remove small background holes.
-
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-bin_closing_5 = filter.filter_binary_closing(no_grays, disk_size=5)
-util.display_img(bin_closing_5, "Binary Closing (5)", bg=True)
-bin_closing_20 = filter.filter_binary_closing(no_grays, disk_size=20)
-util.display_img(bin_closing_20, "Binary Closing (20)", bg=True)
-```
-
-| **Binary Closing (disk_size = 5)** | **Binary Closing (disk_size = 20)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Like opening, closing is a fairly expensive operation since it performs both a dilation and an erosion. Compute time
-increases with structuring element size.
-
-```
-RGB | Time: 0:00:00.179190 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.079992 Type: bool Shape: (1385, 1810)
-Binary Closing | Time: 0:00:00.241882 Type: uint8 Shape: (1385, 1810)
-Binary Closing | Time: 0:00:02.592515 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Remove Small Objects
-
-The scikit-image `remove_small_objects()` function removes objects less than a particular minimum size. The
-`filter_remove_small_objects()` function wraps this and adds additional functionality. This can be useful for
-removing small islands of noise from images. We'll demonstrate it here with two sizes, 100 pixels and 10,000 pixels,
-and we'll perform this on the "No Grays" binary image.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-remove_small_100 = filter.filter_remove_small_objects(no_grays, min_size=100)
-util.display_img(remove_small_100, "Remove Small Objects (100)", bg=True)
-remove_small_10000 = filter.filter_remove_small_objects(no_grays, min_size=10000)
-util.display_img(remove_small_10000, "Remove Small Objects (10000)", bg=True)
-```
-
-Notice in the "No Grays" binary image that we see lots of scattered, small objects.
-
-| **Original Slide** | **No Grays** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-After removing small objects with a connected size less than 100 pixels, we see that the smallest objects have been
-removed from the binary image. With a minimum size of 10,000 pixels, we see that many larger objects have also been
-removed from the binary image.
-
-| **Remove Small Objects (100)** | **Remove Small Objects (10000)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-The performance of the filters to remove small objects is quite fast.
-
-```
-RGB | Time: 0:00:00.177367 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.081827 Type: bool Shape: (1385, 1810)
-Remove Small Objs | Time: 0:00:00.053734 Type: uint8 Shape: (1385, 1810)
-Remove Small Objs | Time: 0:00:00.044924 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Remove Small Holes
-
-The scikit-image `remove_small_holes()` function is similar to the `remove_small_objects()` function except it removes
-holes rather than objects from binary images. Here we demonstrate this using the `filter_remove_small_holes()`
-function with sizes of 100 pixels and 10,000 pixels.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-util.display_img(no_grays, "No Grays", bg=True)
-remove_small_100 = filter.filter_remove_small_holes(no_grays, min_size=100)
-util.display_img(remove_small_100, "Remove Small Holes (100)", bg=True)
-remove_small_10000 = filter.filter_remove_small_holes(no_grays, min_size=10000)
-util.display_img(remove_small_10000, "Remove Small Holes (10000)", bg=True)
-```
-
-Notice that using a minimum size of 10,000 removes more holes than a size of 100, as we would expect.
-
-| **Remove Small Holes (100)** | **Remove Small Holes (10000)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.171669 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.081116 Type: bool Shape: (1385, 1810)
-Remove Small Holes | Time: 0:00:00.043491 Type: uint8 Shape: (1385, 1810)
-Remove Small Holes | Time: 0:00:00.044550 Type: uint8 Shape: (1385, 1810)
-```
-
-
-##### Fill Holes
-
-The scikit-image `binary_fill_holes()` function is similar to the `remove_small_holes()` function. Using its default
-settings, it generates results similar but typically not identical to `remove_small_holes()` with a high minimum
-size value.
-
-Here, we'll display the result of `filter_binary_fill_holes()` on the image after gray shades have been removed. After
-this, we'll perform exclusive-or operations to look at the differences between "Fill Holes" and "Remove Small Holes"
-with size values of 100 and 10,000.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-no_grays = filter.filter_grays(rgb, output_type="bool")
-fill_holes = filter.filter_binary_fill_holes(no_grays)
-util.display_img(fill_holes, "Fill Holes", bg=True)
-
-remove_holes_100 = filter.filter_remove_small_holes(no_grays, min_size=100, output_type="bool")
-util.display_img(fill_holes ^ remove_holes_100, "Differences between Fill Holes and Remove Small Holes (100)", bg=True)
-
-remove_holes_10000 = filter.filter_remove_small_holes(no_grays, min_size=10000, output_type="bool")
-util.display_img(fill_holes ^ remove_holes_10000, "Differences between Fill Holes and Remove Small Holes (10000)", bg=True)
-
-```
-
-| **Original Slide** | **Fill Holes** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-In this example, increasing the minimum small hole size results in less differences between "Fill Holes" and
-"Remove Small Holes".
-
-| **Differences between Fill Holes and Remove Small Holes (100)** | **Differences between Fill Holes and Remove Small Holes (10000)** |
-| -------------------- | --------------------------------- |
-| ") | ") |
-
-
-Console output:
-
-```
-RGB | Time: 0:00:00.176696 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.082582 Type: bool Shape: (1385, 1810)
-Binary Fill Holes | Time: 0:00:00.069583 Type: bool Shape: (1385, 1810)
-Remove Small Holes | Time: 0:00:00.046232 Type: bool Shape: (1385, 1810)
-Remove Small Holes | Time: 0:00:00.044539 Type: bool Shape: (1385, 1810)
-```
-
-
-#### Entropy
-
-The scikit-image `entropy()` function allows us to filter images based on complexity. Since areas such as slide
-backgrounds are less complex than area of interest such as cell nuclei, filtering on entropy offers interesting
-possibilities for tissue identification.
-
-Here, we use the `filter_entropy()` function to filter the grayscale image based on entropy. We display
-the resulting binary image. After that, we mask the original image with the entropy mask and the inverse of the entropy
-mask.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original")
-gray = filter.filter_rgb_to_grayscale(rgb)
-util.display_img(gray, "Grayscale")
-entropy = filter.filter_entropy(gray, output_type="bool")
-util.display_img(entropy, "Entropy")
-util.display_img(util.mask_rgb(rgb, entropy), "Original with Entropy Mask")
-util.display_img(util.mask_rgb(rgb, ~entropy), "Original with Inverse of Entropy Mask")
-```
-
-| **Original Slide** | **Grayscale** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-| **Entropy Filter** |
-| ------------------ |
-|  |
-
-
-The results of the original image with the inverse of the entropy mask are particularly interesting. Notice that much
-of the white background including the shadow region at the top of the slide has been filtered out. Additionally, notice
-that for the stained regions, a significant amount of the pink eosin-stained area has been filtered out while a
-smaller proportion of the purple-stained hemotoxylin area has been filtered out. This makes sense since hemotoxylin
-stains regions such as cell nuclei, which are structures with significant complexity. Therefore, entropy seems
-like a potential tool that could be used to identify regions of interest where mitoses are occurring.
-
-
-| **Original with Entropy Mask** | **Original with Inverse of Entropy Mask** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-A drawback of using entropy is that its computation is significant. The entropy filter takes over 3 seconds to run
-in this example.
-
-```
-RGB | Time: 0:00:00.177166 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.116245 Type: uint8 Shape: (1385, 1810)
-Entropy | Time: 0:00:03.306786 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.010422 Type: uint8 Shape: (1385, 1810, 3)
-Mask RGB | Time: 0:00:00.006140 Type: uint8 Shape: (1385, 1810, 3)
-```
-
-
-#### Canny Edge Detection
-
-Edges in images are areas where there is typically a significant, abrupt change in image brightness.
-The Canny edge detection algorithm is implemented in sci-kit image. More information about
-edge detection can be found at [https://en.wikipedia.org/wiki/Edge_detection](https://en.wikipedia.org/wiki/Edge_detection).
-More information about Canny edge detection can be found at
-[https://en.wikipedia.org/wiki/Canny_edge_detector](https://en.wikipedia.org/wiki/Canny_edge_detector).
-
-The sci-kit image `canny()` function returns a binary edge map for the detected edges in an input image. In the
-example below, we call `filter_canny()` on the grayscale image and display the resulting Canny edges.
-After this, we crop a 600x600 area of the original slide and display it. We apply the inverse of the
-canny mask to the cropped original slide area and display it for comparison.
-
-```
-img_path = slide.get_training_image_path(2)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original", bg=True)
-gray = filter.filter_rgb_to_grayscale(rgb)
-canny = filter.filter_canny(gray, output_type="bool")
-util.display_img(canny, "Canny", bg=True)
-rgb_crop = rgb[300:900, 300:900]
-canny_crop = canny[300:900, 300:900]
-util.display_img(rgb_crop, "Original", size=24, bg=True)
-util.display_img(util.mask_rgb(rgb_crop, ~canny_crop), "Original with ~Canny Mask", size=24, bg=True)
-```
-
-| **Original** | **Canny Edges** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-By applying the inverse of the canny edge mask to the original image, the detected edges are colored black. This
-visually accentuates the different structures in the slide.
-
-| **Cropped Original** | **Cropped Original with Inverse Canny Edges Mask** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-In the console output, we see that Canny edge detection is fairly expensive, since its computation took over 1 second.
-
-```
-RGB | Time: 0:00:00.174458 Type: uint8 Shape: (1385, 1810, 3)
-Gray | Time: 0:00:00.116023 Type: uint8 Shape: (1385, 1810)
-Canny Edges | Time: 0:00:01.017241 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.001443 Type: uint8 Shape: (600, 600, 3)
-```
-
-
-### Combining Filters
-
-Since our image filters utilize NumPy arrays, it is straightforward to combine our filters. For example, when
-we have filters that return boolean images for masking, we can use standard boolean algebra on our arrays to perform
-operations such as AND, OR, XOR, and NOT. We can also run filters on the results of other filters.
-
-As an example, here we run our green pen and blue pen filters on the original RGB image to filter out the green and
-blue pen marks on the slide. We combine the resulting masks with a boolean AND (&) operation. We display the resulting
-mask and this mask applied to the original image, masking out the green and blue pen marks from the image.
-
-```
-img_path = slide.get_training_image_path(74)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original")
-no_green_pen = filter.filter_green_pen(rgb)
-util.display_img(no_green_pen, "No Green Pen")
-no_blue_pen = filter.filter_blue_pen(rgb)
-util.display_img(no_blue_pen, "No Blue Pen")
-no_gp_bp = no_green_pen & no_blue_pen
-util.display_img(no_gp_bp, "No Green Pen, No Blue Pen")
-util.display_img(util.mask_rgb(rgb, no_gp_bp), "Original with No Green Pen, No Blue Pen")
-```
-
-| **Original Slide** |
-| -------------------- |
-|  |
-
-| **No Green Pen** | **No Blue Pen** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-| **No Green Pen, No Blue Pen** | **Original with No Green Pen, No Blue Pen** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console Output:
-
-```
-RGB | Time: 0:00:00.525283 Type: uint8 Shape: (2592, 3509, 3)
-Filter Green Pen | Time: 0:00:00.562343 Type: bool Shape: (2592, 3509)
-Filter Blue Pen | Time: 0:00:00.414910 Type: bool Shape: (2592, 3509)
-Mask RGB | Time: 0:00:00.054763 Type: uint8 Shape: (2592, 3509, 3)
-```
-
-
----
-
-Let's try another combination of filters that should give us fairly good tissue segmentation for this slide,
-where the slide background and blue and green pen marks are removed. We can do this for this slide by ANDing
-together the "No Grays" filter, the "Green Channel" filter, the "No Green Pen" filter, and the "No Blue Pen" filter.
-In addition, we can use our "Remove Small Objects" filter to remove small islands from the mask. We display the
-resulting mask. We apply this mask and the inverse of the mask to the original image to visually see which parts of the
-slide are passed through and which parts are masked out.
-
-```
-img_path = slide.get_training_image_path(74)
-img = slide.open_image(img_path)
-rgb = util.pil_to_np_rgb(img)
-util.display_img(rgb, "Original")
-mask = filter.filter_grays(rgb) & filter.filter_green_channel(rgb) & filter.filter_green_pen(rgb) & filter.filter_blue_pen(rgb)
-mask = filter.filter_remove_small_objects(mask, min_size=100, output_type="bool")
-util.display_img(mask, "No Grays, Green Channel, No Green Pen, No Blue Pen, No Small Objects")
-util.display_img(util.mask_rgb(rgb, mask), "Original with No Grays, Green Channel, No Green Pen, No Blue Pen, No Small Objects")
-util.display_img(util.mask_rgb(rgb, ~mask), "Original with Inverse Mask")
-```
-
-| **Original Slide** | **No Grays, Green Channel, No Green Pen, No Blue Pen, No Small Objects** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-We see that this combination does a good job at allowing us to filter the most relevant tissue sections of this slide.
-
-| **Original with No Grays, Green Channel, No Green Pen, No Blue Pen, No Small Objects** | **Original with Inverse Mask** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Console Output:
-
-```
-RGB | Time: 0:00:00.496920 Type: uint8 Shape: (2592, 3509, 3)
-Filter Grays | Time: 0:00:00.361576 Type: bool Shape: (2592, 3509)
-Filter Green Channel | Time: 0:00:00.020190 Type: bool Shape: (2592, 3509)
-Filter Green Pen | Time: 0:00:00.488955 Type: bool Shape: (2592, 3509)
-Filter Blue Pen | Time: 0:00:00.369501 Type: bool Shape: (2592, 3509)
-Remove Small Objs | Time: 0:00:00.178179 Type: bool Shape: (2592, 3509)
-Mask RGB | Time: 0:00:00.047400 Type: uint8 Shape: (2592, 3509, 3)
-Mask RGB | Time: 0:00:00.048710 Type: uint8 Shape: (2592, 3509, 3)
-```
-
-
----
-
-In the `wsi/filter.py` file, the `apply_filters_to_image(slide_num, save=True, display=False)` function is the
-primary way we apply a set of filters to an image with the goal of identifying the tissue in the slide. This
-function allows us to see the results of each filter and the combined results of different filters. If the
-`save` parameter is `True`, the various filter results will be saved to the file system. If the `display`
-parameter is `True`, the filter results will be displayed on the screen. The function returns a tuple consisting of
-the resulting NumPy array image and a dictionary of information that is used elsewhere for generating an HTML page
-to view the various filter results for multiple slides, as we will see later.
-
-The `apply_filters_to_image()` function calls the `apply_image_filters()` function, which creates green channel, grays,
-red pen, green pen, and blue pen masks and combines these into a single mask using boolean ANDs.
-After this, small objects are removed from the mask.
-
-```
-mask_not_green = filter_green_channel(rgb)
-mask_not_gray = filter_grays(rgb)
-mask_no_red_pen = filter_red_pen(rgb)
-mask_no_green_pen = filter_green_pen(rgb)
-mask_no_blue_pen = filter_blue_pen(rgb)
-mask_gray_green_pens = mask_not_gray & mask_not_green & mask_no_red_pen & mask_no_green_pen & mask_no_blue_pen
-mask_remove_small = filter_remove_small_objects(mask_gray_green_pens, min_size=500, output_type="bool")
-```
-
-After each of the above masks is created, it is applied to the original image and the resulting image is saved
-to the file system, displayed to the screen, or both.
-
-Let's try this function out. In this example, we run `apply_filters_to_image()` on slide #337 and display the results
-to the screen.
-
-```
-filter.apply_filters_to_image(337, display=True, save=False)
-```
-
-Note that this function utilizes the scaled-down `png` image for slide #337. If we have not generated `png` images for
-all the slides (typically by calling `slide.multiprocess_training_slides_to_images()`), we can generate the individual
-scaled-down `png` image and then apply the filters to this image.
-
-```
-slide.training_slide_to_image(337)
-filter.apply_filters_to_image(337, display=True, save=False)
-```
-
-Here, we see the original slide #337 and the green channel filter applied to it. The original slide is marked as 0.12%
-masked because a small number of pixels in the original image are black (0 values for the red, green, and blue
-channels). Notice that the green channel filter
-with a default threshold of 200 removes most of the white background but only a relatively small fraction of the green
-pen. The green channel filter masks 72.60% of the original slide.
-
-| **Slide 337, F001** | **Slide 337, F002** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Here, we see the results of the grays filter and the red pen filter. For this slide, the grays filter masks 68.01% of
-the slide, which is actually less than the green channel filter. The red pen filter masks only 0.18% of the slide,
-which makes sense since there are no red pen marks on the slide.
-
-| **Slide 337, F003** | **Slide 337, F004** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-The green pen filter masks 3.81% of the slide. Visually, we see that it does a decent job of masking out the green
-pen marks on the slide. The blue pen filter masks 0.12% of the slide, which is accurate since there are no blue pen
-marks on the slide.
-
-| **Slide 337, F005** | **Slide 337, F006** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-Combining the above filters with a boolean AND results in 74.57% masking. Cleaning up these results by remove small
-objects results in a masking of 76.11%. This potentially gives a good tissue segmentation that we can use for deep
-learning.
-
-| **Slide 337, F007** | **Slide 337, F008** |
-| -------------------- | --------------------------------- |
-|  |  |
-
-
-In the console, we see the slide #337 processing time takes ~12.6s in this example. The filtering is only a relatively
-small fraction of this time. If we set `display` to `False`, processing only takes ~2.3s.
-
-```
-Processing slide #337
-RGB | Time: 0:00:00.568235 Type: uint8 Shape: (2515, 3149, 3)
-Filter Green Channel | Time: 0:00:00.017670 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.037547 Type: uint8 Shape: (2515, 3149, 3)
-Filter Grays | Time: 0:00:00.323861 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.032874 Type: uint8 Shape: (2515, 3149, 3)
-Filter Red Pen | Time: 0:00:00.253547 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.035073 Type: uint8 Shape: (2515, 3149, 3)
-Filter Green Pen | Time: 0:00:00.395172 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.032597 Type: uint8 Shape: (2515, 3149, 3)
-Filter Blue Pen | Time: 0:00:00.314914 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.034853 Type: uint8 Shape: (2515, 3149, 3)
-Mask RGB | Time: 0:00:00.034556 Type: uint8 Shape: (2515, 3149, 3)
-Remove Small Objs | Time: 0:00:00.160241 Type: bool Shape: (2515, 3149)
-Mask RGB | Time: 0:00:00.030854 Type: uint8 Shape: (2515, 3149, 3)
-Slide #337 processing time: 0:00:12.576835
-```
-
-Since `apply_filters_to_image()` returns the resulting image as a NumPy array, we can perform further processing on
-the image. If we look at the `apply_filters_to_image()` results for slide #337, we can see that some grayish greenish
-pen marks remain on the slide. We can filter some of these out using our `filter_green()` function with different
-threshold values and our `filter_grays()` function with an increased tolerance value.
-
-We'll compare the results by cropping two regions of the slide before and after this additional processing and
-displaying all four of these regions together.
-
-```
-rgb, _ = filter.apply_filters_to_image(337, display=False, save=False)
-
-not_greenish = filter.filter_green(rgb, red_upper_thresh=125, green_lower_thresh=30, blue_lower_thresh=30, display_np_info=True)
-not_grayish = filter.filter_grays(rgb, tolerance=30)
-rgb_new = util.mask_rgb(rgb, not_greenish & not_grayish)
-
-row1 = np.concatenate((rgb[1200:1800, 150:750], rgb[1150:1750, 2050:2650]), axis=1)
-row2 = np.concatenate((rgb_new[1200:1800, 150:750], rgb_new[1150:1750, 2050:2650]), axis=1)
-result = np.concatenate((row1, row2), axis=0)
-util.display_img(result)
-```
-
-After the additional processing, we see that the pen marks in the displayed regions have been significantly reduced.
-
-| **Remove More Green and More Gray** |
-| -------------------- |
-|  |
-
-
-As another example, here we can see a summary of filters applied to a slide by `apply_filters_to_image()` and the
-resulting masked image.
-
-| **Filter Example** |
-| ------------------ |
-|  |
-
-
-### Applying Filters to Multiple Images
-
-When designing our set of tissue-selecting filters, one very important requirement is the ability to visually inspect
-the filter results across multiple slides. Ideally we should easily be able to alternate between displaying the
-results for a single image, a select subset of our training image dataset, and our entire dataset. Additionally,
-multiprocessing can result in a significant performance boost, so we should be able to multiprocess our image
-processing if desired.
-
-A simple, powerful way to visually inspect our filter results is to generate an HTML page for a set of images.
-
-The following functions in `wsi/filter.py` can be used to apply filters to multiple images:
-
-```
-apply_filters_to_image_list(image_num_list, save, display)
-apply_filters_to_image_range(start_ind, end_ind, save, display)
-singleprocess_apply_filters_to_images(save=True, display=False, html=True, image_num_list=None)
-multiprocess_apply_filters_to_images(save=True, display=False, html=True, image_num_list=None)
-
-```
-
-The `apply_filters_to_image_list()` function takes a list of image numbers for processing. It does not generate an
-HTML page but it does generate information that can be used by other functions to generate an HTML page.
-The `save` parameter if `True` will save the generated images to the file system. If the `display` parameter
-is `True`, the generated images will be displayed to the screen. If several slides are being processed,
-`display` should be set to False.
-
-The `apply_filters_to_image_range()` function is similar to `apply_filters_to_image_list()` except than rather than
-taking a list of image numbers, it takes a starting index number and ending index number for the slides in the
-training set. Like `apply_filters_to_image_list()`, the `apply_filters_to_image_range()` function has `save` and
-`display` parameters.
-
-The `singleprocess_apply_filters_to_images()` and `multiprocess_apply_filters_to_images()` functions are the
-primary functions that should be called to apply filters to multiple images. Both of these functions feature `save`
-and `display` parameters. The additional `html` parameter if `True` generates an HTML page for displaying the filter
-results on the image set. The `singleprocess_apply_filters_to_images()` and `multiprocess_apply_filters_to_images()`
-functions also feature an `image_num_list` parameter which specifies a list of image numbers that should be
-processed. If `image_num_list` is not supplied, all training images are processed.
-
-As an example, let's use a single process to apply our filters to images 1, 2, and 3. We can accomplish this with
-the following:
-
-```
-filter.singleprocess_apply_filters_to_images(image_num_list=[1, 2, 3])
-```
-
-In addition to saving the filtered images to the file system, this creates a `filters.html` file that displays all the
-filtered slide images.
-If we open the `filters.html` file in a browser, we can see 8 images displayed for each slide. Each separate slide
-is displayed as a separate row. Here, we see the filter results for slides #1, #2, and #3 displayed in a browser.
-
-| **Filters for Slides 1, 2, 3** |
-| -------------------- |
-|  |
-
-
-To apply all filters to all images in the training set using multiprocessing, we can utilize the
-`multiprocess_apply_filters_to_images()` function. Since there are 9 generated images per slide
-(8 of which are shown in the HTML summary) and 500 slides, this results in a total of 4,500 images
-and 4,500 thumbnails. Generating `png` images and `jpg` thumbnails, this takes about 24 minutes on
-my MacBook Pro.
-
-```
-filter.multiprocess_apply_filters_to_images()
-```
-
-If we display the `filters.html` file in a browser, we see that the filter results for the first 50 slides are
-displayed. By default, results are paginated at 50 slides per page. Pagination can be turned on and off using the
-`FILTER_PAGINATE` constant. The pagination size can be adjusted using the `FILTER_PAGINATION_SIZE` constant.
-
-One useful action we can take is to group similar slides into categories. For example,
-we can group slides into sets that have red, green, and blue pen marks on them.
-
-```
-red_pen_slides = [4, 15, 24, 48, 63, 67, 115, 117, 122, 130, 135, 165, 166, 185, 209, 237, 245, 249, 279, 281, 282, 289, 336, 349, 357, 380, 450, 482]
-green_pen_slides = [51, 74, 84, 86, 125, 180, 200, 337, 359, 360, 375, 382, 431]
-blue_pen_slides = [7, 28, 74, 107, 130, 140, 157, 174, 200, 221, 241, 318, 340, 355, 394, 410, 414, 457, 499]
-```
-
-We can run our filters on the list of red pen slides in the following manner:
-
-```
-filter.multiprocess_apply_filters_to_images(image_num_list=red_pen_slides)
-```
-
-In this way, we can make tweaks to specific filters or combinations of specific filters and see how these changes apply
-to the subset of relevant training images without requiring reprocessing of the entire training dataset.
-
-| **Red Pen Slides with Filter Results** |
-| -------------------- |
-|  |
-
-
-### Overmask Avoidance
-
-When developing filters and filter settings to perform tissue segmentation on the entire training
-set, we have to deal with a great amount of variation in the slide samples. To begin with, some slides have a large
-amount of tissue on them, while other slides only have a minimal amount of tissue. There is a great deal of
-variation in tissue staining. We also need to deal with additional issues such as pen marks and shadows on some of
-the slides.
-
-Slide #498 is an example of a slide with a large tissue sample. After filtering, the slide is 46% masked.
-
-| **Slide with Large Tissue Sample** | **Slide with Large Tissue Sample after Filtering** |
-| -- | -- |
-|  |  |
-
-
-Slide #127 is an example of a small tissue sample. After filtering, the slide is 93% masked. With such a small tissue
-sample to begin with, we need to be careful that our filters don't overmask this slide.
-
-| **Slide with Small Tissue Sample** | **Slide with Small Tissue Sample after Filtering** |
-| -- | -- |
-|  |  |
-
-
-Being aggressive in our filtering may generate excellent results for many of the slides but may
-result in overmasking of other slides, where the amount of non-tissue masking is too high. For example, if 99% of
-a slide is masked, it has been overmasked.
-
-Avoiding overmasking across the entire training dataset can be difficult. For example, suppose we have a slide that
-has only a proportionaly small amount of tissue on it to start, say 10%. If this particular tissue sample has been
-poorly stained so that it is perhaps a light purplish grayish color, applying our grays or green channel filters might
-result in a significant portion of the tissue being masked out. This could also potentially result in small
-islands of non-masked tissue, and since we utilize a filter to remove small objects, this could result in the
-further masking out of additional tissue regions. In such a situation, masking of 95% to 100% of the slide is possible.
-
-Slide #424 has a small tissue sample and its staining is a very faint lavender color. Slide #424 is
-at risk for overmasking with our given combination of filters.
-
-| **Slide with Small Tissue Sample and Faint Staining** |
-| -- |
-|  |
-
-
-Therefore, rather than having fixed settings, we can automatically have our filters tweak parameter values to avoid
-overmasking if desired. As examples, the `filter_green_channel()` and `filter_remove_small_objects()` functions have
-this ability. If masking exceeds a certain overmasking threshold, a parameter value can be changed to lower
-the amount of masking until the masking is below the overmasking threshold.
-
-```
-filter.filter_green_channel(np_img, green_thresh=200, avoid_overmask=True, overmask_thresh=90, output_type="bool")
-filter.filter_remove_small_objects(np_img, min_size=3000, avoid_overmask=True, overmask_thresh=95, output_type="uint8")
-```
-
-For the `filter_green_channel()` function, if a `green_thresh` value of 200 results in masking over 90%, the
-function will try with a higher `green_thresh` value (228) and the masking level will be checked. This will continue
-until the masking doesn't exceed the overmask threshold of 90% or the threshold is 255.
-
-For the `filter_remove_small_objects()` function, if a `min_size` value of 3000 results in a masking level over 95%,
-the function will try with a lower `min_size` value (1500) and the masking level will be checked. These `min_size`
-reductions will continue until the masking level isn't over 95% or the minimum size is 0. For the image filtering
-specified in `apply_image_filters`, a starting `min_size` value of 500 for `filter_remove_small_objects()` is used.
-
-Examining our full set of images using `multiprocess_apply_filters_to_images()`, we can identify slides that are
-at risk for overmasking. We can create a list of these slide numbers and use `multiprocess_apply_filters_to_images()`
-with this list of slide numbers to generate the `filters.html` page that allows us to visually inspect the filters
-applied to this set of slides.
-
-```
-overmasked_slides = [1, 21, 29, 37, 43, 88, 116, 126, 127, 142, 145, 173, 196, 220, 225, 234, 238, 284, 292, 294, 304,
- 316, 401, 403, 424, 448, 452, 472, 494]
-filter.multiprocess_apply_filters_to_images(image_num_list=overmasked_slides)
-```
-
-Let's have a look at how we reduce overmasking on slide 21, which is a slide that has very faint staining.
-
-| **Slide 21** |
-| -------------------- |
-|  |
-
-
-We'll run our filters on slide #21.
-
-```
-filter.singleprocess_apply_filters_to_images(image_num_list=[21])
-```
-
-If we set the `filter_green_channel()` and `filter_remove_small_objects()` `avoid_overmask` parameters to False,
-97.69% of the original image is masked by the "green channel" filter and 99.92% of the original image is
-masked by the subsequent "remove small objects" filter. This is significant overmasking.
-
-| **Overmasked by Green Channel Filter (97.69%)** | **Overmasked by Remove Small Objects Filter (99.92%)** |
-| -- | -- |
-| ") | ")
-
-If we set `avoid_overmask` to True for `filter_remove_small_objects()`, we see that the "remove small objects"
-filter does not perform any further masking since the 97.69% masking from the previous "green channel" filter
-already exceeds its overmasking threshold of 95%.
-
-| **Overmasked by Green Channel Filter (97.69%)** | **Avoid Overmask by Remove Small Objects Filter (97.69%)** |
-| -- | -- |
-| ") | ")
-
-
-If we set `avoid_overmask` back to False for `filter_remove_small_objects()` and we set `avoid_overmask` to True for
-`filter_green_channel()`, we see that 87.91% of the original image is masked by the "green channel" filter (under
-the 90% overmasking threshold for the filter) and 97.40% of the image is masked by the subsequent
-"remove small objects" filter.
-
-| **Avoid Overmask by Green Channel Filter (87.91%)** | **Overmask by Remove Small Objects Filter (97.40%)** |
-| -- | -- |
-| ") | ")
-
-
-If we set `avoid_overmask` to True for both `filter_green_channel()` and `filter_remove_small_objects()`, we see that
-the resulting masking after the "remove small objects" filter has been reduced to 94.88%, which is under its
-overmasking threshold of 95%.
-
-| **Avoid Overmask by Green Channel Filter (87.91%)** | **Avoid Overmask by Remove Small Objects Filter (94.88%)** |
-| -- | -- |
-| ") | ")
-
-
-Thus, in this example we've reduced the masking from 99.92% to 94.88%.
-
-We can see the filter adjustments being made in the console output.
-
-```
-Processing slide #21
-RGB | Time: 0:00:00.095414 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.617039 Name: ../data/filter_png/TUPAC-TR-021-001-rgb.png
-Save Thumbnail | Time: 0:00:00.019557 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-001-rgb.jpg
-Mask percentage 97.69% >= overmask threshold 90.00% for Remove Green Channel green_thresh=200, so try 228
-Filter Green Channel | Time: 0:00:00.005335 Type: bool Shape: (1496, 1576)
-Filter Green Channel | Time: 0:00:00.010499 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.009980 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.322629 Name: ../data/filter_png/TUPAC-TR-021-002-rgb-not-green.png
-Save Thumbnail | Time: 0:00:00.018244 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-002-rgb-not-green.jpg
-Filter Grays | Time: 0:00:00.072200 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.010461 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.295995 Name: ../data/filter_png/TUPAC-TR-021-003-rgb-not-gray.png
-Save Thumbnail | Time: 0:00:00.017668 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-003-rgb-not-gray.jpg
-Filter Red Pen | Time: 0:00:00.055296 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.008704 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.595753 Name: ../data/filter_png/TUPAC-TR-021-004-rgb-no-red-pen.png
-Save Thumbnail | Time: 0:00:00.016758 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-004-rgb-no-red-pen.jpg
-Filter Green Pen | Time: 0:00:00.088633 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.008860 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.585474 Name: ../data/filter_png/TUPAC-TR-021-005-rgb-no-green-pen.png
-Save Thumbnail | Time: 0:00:00.016964 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-005-rgb-no-green-pen.jpg
-Filter Blue Pen | Time: 0:00:00.069669 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.009665 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.589634 Name: ../data/filter_png/TUPAC-TR-021-006-rgb-no-blue-pen.png
-Save Thumbnail | Time: 0:00:00.016736 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-006-rgb-no-blue-pen.jpg
-Mask RGB | Time: 0:00:00.009115 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.294103 Name: ../data/filter_png/TUPAC-TR-021-007-rgb-no-gray-no-green-no-pens.png
-Save Thumbnail | Time: 0:00:00.017540 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-007-rgb-no-gray-no-green-no-pens.jpg
-Mask percentage 97.40% >= overmask threshold 95.00% for Remove Small Objs size 500, so try 250
-Mask percentage 96.83% >= overmask threshold 95.00% for Remove Small Objs size 250, so try 125
-Mask percentage 95.87% >= overmask threshold 95.00% for Remove Small Objs size 125, so try 62
-Remove Small Objs | Time: 0:00:00.031198 Type: bool Shape: (1496, 1576)
-Remove Small Objs | Time: 0:00:00.062300 Type: bool Shape: (1496, 1576)
-Remove Small Objs | Time: 0:00:00.095616 Type: bool Shape: (1496, 1576)
-Remove Small Objs | Time: 0:00:00.128008 Type: bool Shape: (1496, 1576)
-Mask RGB | Time: 0:00:00.007214 Type: uint8 Shape: (1496, 1576, 3)
-Save Image | Time: 0:00:00.235025 Name: ../data/filter_png/TUPAC-TR-021-008-rgb-not-green-not-gray-no-pens-remove-small.png
-Save Thumbnail | Time: 0:00:00.016905 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-008-rgb-not-green-not-gray-no-pens-remove-small.jpg
-Save Image | Time: 0:00:00.232206 Name: ../data/filter_png/TUPAC-TR-021-32x-50432x47872-1576x1496-filtered.png
-Save Thumbnail | Time: 0:00:00.017285 Name: ../data/filter_thumbnail_jpg/TUPAC-TR-021-32x-50432x47872-1576x1496-filtered.jpg
-Slide #021 processing time: 0:00:04.596086
-
-```
-
-
-## Tiles
-
-Following our filtering, we should have fairly good tissue segmentation for our dataset,
-where non-tissue pixels have been masked out from our 1/32x scaled-down slide images. At this
-stage, we break our images into tile regions. Tiling code is located in the `wsi/tiles.py`
-file.
-
-For visualization, the tissue percentage of each tile is color-coded in a similar fashion
-to a heat map. Tiles with 80% or more tissue are green, tiles less than 80% tissue and greater
-or equal to 10% tissue are yellow, tiles less than 10% tissue and greater than 0% tissue are
-orange, and tiles with 0% tissue are red.
-
-The heat map threshold values can be adjusted by modifying the `TISSUE_HIGH_THRESH` and
-`TISSUE_LOW_THRESH` constants in `wsi/tiles.py`, which have default values of 80 and 10
-respectively. Heat map colors can be adjusted by modifying the `HIGH_COLOR`, `MEDIUM_COLOR`,
-`LOW_COLOR`, and `NONE_COLOR` constants. The heat map border size can be adjusted using the
-`TILE_BORDER_SIZE` constant, which has a default value of 2.
-Tile sizes are specified according to number of pixels in the original WSI files. The
-default `ROW_TILE_SIZE` and `COL_TILE_SIZE` values are 1,024 pixels.
-
-To generate and display tiles for a single slide, we utilize the `summary_and_tiles()` function,
-which generates tile summaries and returns the top scoring tiles for a slide. We discuss
-tile scoring in a later section.
-
-Let's generate tile tissue heat map summaries for slide #2 and display the summaries to the screen.
-
-```
-tiles.summary_and_tiles(2, display=True, save_summary=True, save_data=False, save_top_tiles=False)
-```
-
-Here, we see the tile tissue segmentation heat map summaries that are generated. The heat maps are
-displayed on the masked image and the original image to allow for comparison.
-
-| **Tissue Heat Map** | **Tissue Heat Map on Original** |
-| ------------------------ | ------------------------------------ |
-|  |  |
-
-We see a variety of slide statistics displayed on the tile summaries. We see that slide #2
-has dimensions of 57,922x44,329. After scaling down the slide width and height by 1/32x, we have a
-`png` image with dimensions 1,810x1,385. Breaking this image down into 32x32 tiles, we have 57 rows
-and 44 columns, making a total of 2,508 tiles. Using our tissue segmentation filtering algorithms,
-we have 1,283 tiles with high tissue percentages (>=80%), 397 tiles with medium tissue percentages
-(>=10% and <80%), 102 tiles with low tissue percentages (>0% and <10%), and 726 tiles with no tissue
-(0%).
-
-| Characteristic | Result |
-| ------------------- | ------------- |
-| Original Dimensions | 57,922x44,329 |
-| Original Tile Size | 1,024x1,024 |
-| Scale Factor | 1/32x |
-| Scaled Dimensions | 1,810x1,385 |
-| Scaled Tile Size | 32x32 |
-| Total Mask | 41.60% |
-| Total Tissue | 58.40% |
-| Tiles | 57x44 = 2,508 |
-| | 1,283 (51.16%) tiles >=80% tissue |
-| | 397 (15.83%) tiles >=10% and <80% tissue |
-| | 102 ( 4.07%) tiles >0% and <10% tissue |
-| | 726 (28.95%) tiles =0% tissue |
-
-
-Often it can be useful to know the exact row and column of a particular tile or tiles. If the
-`DISPLAY_TILE_SUMMARY_LABELS` constant is set to True, the row and column of each tile is
-output on the tile summaries. Generating the tile labels is fairly time-consuming, so usually
-`DISPLAY_TILE_SUMMARY_LABELS` should be set to False for performance.
-
-| **Optional Tile Labels** |
-| -------------------- |
-|  |
-
-
-## Tile Scoring
-
-In order to selectively choose how "good" a tile is compared to other tiles, we assign scores to
-tiles based on tissue percentage and color characteristics. To determine the "best" tiles, we
-sort based on score and return the top scoring tiles. We generate top tile summaries based on the
-top scoring tiles, in a similar fashion as the tissue percentage summaries.
-
-The `score_tile()` function assigns a score to a tile based on the tissue percentage and various
-color characteristics of the tile. The scoring formula utilized by `score_tile()` can be summarized
-as follows.
-
-| **Scoring Formula** |
-| -------------------- |
-|  |
-
-The scoring formula generates good results for the images in the dataset and was developed through
-experimentation with the training dataset. The *tissuepercent* is emphasized by squaring its value.
-The *colorfactor* value is used to weigh hematoxylin staining heavier than eosin staining. Utilizing
-the HSV color model, broad saturation and value distributions are given more weight by the
-*saturationvaluefactor*. The *quantityfactor* value utilizes the tissue percentage to give more weight
-to tiles with more tissue. Note that if *colorfactor*, *saturationvaluefactor*, or
-*quantityfactor* evaluate to 0, the *score* will be 0. The *score* is scaled to a value from
-0.0 to 1.0.
-
-During our discussion of color staining, we mentioned that tissue with hematoxylin staining is most
-likely preferable to eosin staining. Hematoxylin stains acidic structures such as DNA and RNA with
-a purple tone, while eosin stains basic structures such as cytoplasm proteins with a pink tone.
-Let's discuss how we can more heavily score tiles with hematoxylin staining over eosin staining.
-
-Differentiating purplish shades from pinkish shades can be difficult using the RGB color space
-(see [https://en.wikipedia.org/wiki/RGB_color_space](https://en.wikipedia.org/wiki/RGB_color_space)).
-Therefore, to compute our *colorfactor* value, we first convert our tile in RGB color space
-to HSV color space (see [https://en.wikipedia.org/wiki/HSL_and_HSV](https://en.wikipedia.org/wiki/HSL_and_HSV)).
-HSV stands for Hue-Saturation-Value. In this color model, the hue is represented as a degree value
-on a circle. Purple has a hue of 270 degrees and pink has a hue of 330
-degrees. We remove all hues less than 260 and greater than 340. Next, we compute the deviation from
-purple (270) and the deviation from pink (330). We compute an average factor which is the squared
-difference of 340 and the hue average. The *colorfactor* is computed as the pink deviation times
-the average factor divided by the purple deviation.
-
-Let's have a closer look at a 32x32 tile and its accompanying HSV hue histogram. Note that in order
-to properly convert a matplotlib chart image (the histogram) to a NumPy image on macOS, we currently
-need to include a call to `matplotlib.use('Agg')`.
-One way we can obtain a particular tile for analysis is to call
-the `dynamic_tile()` function, which we describe in more detail later. Here, we obtain
-the tile at the 29th row and 16th column on slide #2. Setting the `small_tile_in_tile` parameter
-to `True` means that the scaled-down 32x32 tile is included in the returned Tile object.
-The `display_image_with_hsv_hue_histogram()` function is used to display the small tile and its hue
-histogram.
-
-```
-# To get around renderer issue on macOS going from Matplotlib image to NumPy image.
-import matplotlib
-matplotlib.use('Agg')
-from deephistopath.wsi import tiles
-
-tile = tiles.dynamic_tile(2, 29, 16, True)
-tiles.display_image_with_hsv_hue_histogram(tile.get_np_scaled_tile(), scale_up=True)
-```
-
-Here we see the 32x32 slide with its accompanying hue histogram. For convenience, colors have
-been added to the histogram.
-Also, notice that the non-tissue masked-out pixels have a peak at 0 degrees.
-
-| **Tile HSV Hue Histogram** |
-| -------------------- |
-|  |
-
-
-For convenience, the `Tile` class has a `display_with_histograms()` function that can be used
-to display histograms for both the RGB and HSV color spaces. If the scaled-down small tile is
-included in the Tile object (using the `dynamic_tile()` `small_tile_in_tile` parameter with a
-value of `True`), histograms will be displayed for both the small tile and the large tile.
-
-```
-import matplotlib
-matplotlib.use('Agg')
-from deephistopath.wsi import tiles
-
-tile = tiles.dynamic_tile(2, 29, 16, True)
-tile.display_with_histograms();
-```
-
-Here we see RGB and HSV histograms for the scaled-down tile at slide 2, row 29, column 16. We
-see its score and tissue percentage. This tile's score was ranked 734 out of
-a total of 2,508 tiles on this slide.
-
-| **Small Tile Color Histograms** |
-| -------------------- |
-|  |
-
-
-Here we see RGB and HSV histograms for the full-sized 1,024x1,024 tile at slide 2, row 29,
-column 16. Notice that the small tile pixels offer a reasonable approximation of the colors
-present on the large tile. Also, notice that the masked-out pixels in the small tissue
-correspond fairly accurately with the non-tissue regions of the large tile.
-
-| **Large Tile Color Histograms** |
-| -------------------- |
-|  |
-
-
-If the `save_data` parameter of the `summary_and_tiles()` function is set to `True`, detailed data about
-the slide tiles are saved in `csv` format.
-
-```
-tiles.summary_and_tiles(2, display=True, save_summary=True, save_data=True, save_top_tiles=False)
-```
-
-For slide #2, this generates a `TUPAC-TR-002-32x-57922x44329-1810x1385-tile_data.csv` file.
-
-| **Tile Data** |
-| ------------- |
-|  |
-
-
-In addition to the tile tissue heat map summaries, the `summary_and_tiles()` function generates
-top tile summaries. By default it highlights the top 50 scoring tiles. The number of top tiles can be
-controlled by the `NUM_TOP_TILES` constant.
-
-```
-tiles.summary_and_tiles(2, display=True, save_summary=True, save_data=False, save_top_tiles=False)
-```
-
-Here we see the top tile summary on the masked image for slide #2. Notice that tiles with high
-tissue percentages and hematoxylin-stained tissue are favored over tiles with low tissue
-percentages and eosin-stained tissue. Notice that statistics about the top 50 scoring tiles are
-displayed to the right of the image.
-
-| **Top Tiles** |
-| ------------- |
-|  |
-
-
-For visual inspection, the top tile summary is also generated over the original slide image, as
-we see here.
-
-| **Top Tiles on Original** |
-| ------------------------- |
-|  |
-
-
-When analyzing top tile results, it can be useful to see the tissue percentage heat map
-of surrounding tiles. This can be accomplished by setting the `BORDER_ALL_TILES_IN_TOP_TILE_SUMMARY`
-constant to `True`. Likewise, it can useful to see the row and column coordinates of all tiles,
-which can be accomplished using the `LABEL_ALL_TILES_IN_TOP_TILE_SUMMARY` constant with a value of
-`True`.
-
-| **Top Tile Borders** | **Top Tile Labels** |
-| -------------------- | -------------------- |
-|  |  |
-
-
-Here we see a section of a top tile summary that features both the tile tissue heat map and the
-row and column labels.
-
-| **Top Tile Labels and Borders** |
-| ------------------------- |
-|  |
-
-## Top Tile Retrieval
-
-Top tiles can be saved as files in batch mode or retrieved dynamically. In batch mode,
-tiling, scoring, and saving the 1,000 tissue percentage heat map summaries (2 per image),
-the 1,000 top tile summaries (2 per image), the 2,000 thumbnails, and 25,000 1Kx1K tiles
-(50 per image) takes approximately 2 hours.
-
-If the `save_top_tiles` parameter of the `summary_and_tiles()` function is set to `True`,
-the top-ranking tiles for the specified slide will be saved to the file system.
-
-```
-tiles.summary_and_tiles(2, display=True, save_summary=True, save_data=False, save_top_tiles=True)
-```
-
-In general, it is recommended that the user utilize the `singleprocess_filtered_images_to_tiles()`
-and `multiprocess_filtered_images_to_tiles()` functions in `wsi/tiles.py`. These functions
-generate convenient HTML pages for investigating the tiles generated for a slide set. The
-`multiprocess_filtered_images_to_tiles()` utilizes multiprocessing for added performance. If
-no `image_num_list` parameter is provided, all images in the dataset will be processed.
-
-Here, we generate the top 50 tiles for slides #1, #2, and #3.
-
-```
-tiles.multiprocess_filtered_images_to_tiles(image_num_list=[1, 2, 3])
-```
-
-On the generated `tiles.html` page, we see the original slide images, the images after filtering,
-the tissue percentage heat map summaries on the filtered images and the original images, tile summary
-data including links to the generated `csv` file for each slide, the top tile summaries on the
-filtered images and the original images, and links to the top 50 tile files for each slide.
-
-| **Tiles Page** |
-| ------------- |
-|  |
-
-
-The full-size 1,024x1,024 tiles can be investigated using the top tile links. Here we see the
-two top-scoring tiles on slide 2 at row 34, column 34 and row 35, column 37.
-
-| **Slide #1, Top Tile #1** | **Slide #1, Top Tile #2** |
-| ------------------------ | ------------------------------------ |
-|  |  |
-
-
-Tiles can also be retrieved dynamically. In dynamic tile retrieval, slides are scaled down,
-filtered, tiled, and scored all in-memory. The top tiles can then be retrieved from the
-original WSI file and stored in-memory. No intermediate files are written to the file system
-during dynamic tile retrieval.
-
-Here, we dynamically obtain a `TileSummary` object by calling `dynamic_tiles()` for
-slide #2. We obtain the top-scoring tiles from `tile_summary`, outputting status
-information about each tile. The status information includes the tile number, the row
-number, the column number, the tissue percentage, and the tile score.
-
-```
-tile_summary = tiles.dynamic_tiles(2)
-top_tiles = tile_summary.top_tiles()
-for t in top_tiles:
- print(t)
-```
-
-In the console output, we see that the original `svs` file is opened, the slide is
-scaled down, and our series of filters is run on the scaled-down image. After that,
-the tiles are scored, and we see status information about the top 50 tiles for
-the slide.
-
-```
-Opening Slide #2: ../data/training_slides/TUPAC-TR-002.svs
-RGB | Time: 0:00:00.007339 Type: uint8 Shape: (1385, 1810, 3)
-Filter Green Channel | Time: 0:00:00.005135 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.007973 Type: uint8 Shape: (1385, 1810, 3)
-Filter Grays | Time: 0:00:00.073780 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.008114 Type: uint8 Shape: (1385, 1810, 3)
-Filter Red Pen | Time: 0:00:00.066007 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.007925 Type: uint8 Shape: (1385, 1810, 3)
-Filter Green Pen | Time: 0:00:00.105854 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.008034 Type: uint8 Shape: (1385, 1810, 3)
-Filter Blue Pen | Time: 0:00:00.087092 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.007963 Type: uint8 Shape: (1385, 1810, 3)
-Mask RGB | Time: 0:00:00.007807 Type: uint8 Shape: (1385, 1810, 3)
-Remove Small Objs | Time: 0:00:00.034308 Type: bool Shape: (1385, 1810)
-Mask RGB | Time: 0:00:00.007814 Type: uint8 Shape: (1385, 1810, 3)
-[Tile #1915, Row #34, Column #34, Tissue 100.00%, Score 0.8824]
-[Tile #1975, Row #35, Column #37, Tissue 100.00%, Score 0.8816]
-[Tile #1974, Row #35, Column #36, Tissue 99.90%, Score 0.8811]
-[Tile #500, Row #9, Column #44, Tissue 99.32%, Score 0.8797]
-[Tile #814, Row #15, Column #16, Tissue 99.22%, Score 0.8795]
-[Tile #1916, Row #34, Column #35, Tissue 100.00%, Score 0.8789]
-[Tile #1956, Row #35, Column #18, Tissue 99.51%, Score 0.8784]
-[Tile #1667, Row #30, Column #14, Tissue 98.63%, Score 0.8783]
-[Tile #1839, Row #33, Column #15, Tissue 99.51%, Score 0.8782]
-[Tile #1725, Row #31, Column #15, Tissue 99.61%, Score 0.8781]
-[Tile #2061, Row #37, Column #9, Tissue 98.54%, Score 0.8779]
-[Tile #724, Row #13, Column #40, Tissue 99.90%, Score 0.8778]
-[Tile #1840, Row #33, Column #16, Tissue 99.22%, Score 0.8777]
-[Tile #758, Row #14, Column #17, Tissue 99.41%, Score 0.8775]
-[Tile #1722, Row #31, Column #12, Tissue 98.24%, Score 0.8771]
-[Tile #722, Row #13, Column #38, Tissue 99.51%, Score 0.8769]
-[Tile #1803, Row #32, Column #36, Tissue 99.22%, Score 0.8769]
-[Tile #446, Row #8, Column #47, Tissue 100.00%, Score 0.8768]
-[Tile #988, Row #18, Column #19, Tissue 99.61%, Score 0.8767]
-[Tile #2135, Row #38, Column #26, Tissue 99.80%, Score 0.8767]
-[Tile #704, Row #13, Column #20, Tissue 99.61%, Score 0.8767]
-[Tile #816, Row #15, Column #18, Tissue 99.41%, Score 0.8766]
-[Tile #1180, Row #21, Column #40, Tissue 99.90%, Score 0.8765]
-[Tile #1178, Row #21, Column #38, Tissue 99.80%, Score 0.8765]
-[Tile #1042, Row #19, Column #16, Tissue 99.71%, Score 0.8764]
-[Tile #1783, Row #32, Column #16, Tissue 99.80%, Score 0.8764]
-[Tile #1978, Row #35, Column #40, Tissue 100.00%, Score 0.8763]
-[Tile #832, Row #15, Column #34, Tissue 99.61%, Score 0.8762]
-[Tile #1901, Row #34, Column #20, Tissue 99.90%, Score 0.8759]
-[Tile #701, Row #13, Column #17, Tissue 99.80%, Score 0.8758]
-[Tile #817, Row #15, Column #19, Tissue 99.32%, Score 0.8757]
-[Tile #2023, Row #36, Column #28, Tissue 100.00%, Score 0.8754]
-[Tile #775, Row #14, Column #34, Tissue 99.51%, Score 0.8754]
-[Tile #1592, Row #28, Column #53, Tissue 100.00%, Score 0.8753]
-[Tile #702, Row #13, Column #18, Tissue 99.22%, Score 0.8753]
-[Tile #759, Row #14, Column #18, Tissue 99.51%, Score 0.8752]
-[Tile #1117, Row #20, Column #34, Tissue 99.90%, Score 0.8751]
-[Tile #1907, Row #34, Column #26, Tissue 99.32%, Score 0.8750]
-[Tile #1781, Row #32, Column #14, Tissue 99.61%, Score 0.8749]
-[Tile #2250, Row #40, Column #27, Tissue 99.61%, Score 0.8749]
-[Tile #1902, Row #34, Column #21, Tissue 99.90%, Score 0.8749]
-[Tile #2014, Row #36, Column #19, Tissue 99.22%, Score 0.8749]
-[Tile #2013, Row #36, Column #18, Tissue 99.51%, Score 0.8747]
-[Tile #1175, Row #21, Column #35, Tissue 99.71%, Score 0.8746]
-[Tile #760, Row #14, Column #19, Tissue 99.22%, Score 0.8746]
-[Tile #779, Row #14, Column #38, Tissue 99.32%, Score 0.8745]
-[Tile #1863, Row #33, Column #39, Tissue 99.71%, Score 0.8745]
-[Tile #1899, Row #34, Column #18, Tissue 99.51%, Score 0.8745]
-[Tile #778, Row #14, Column #37, Tissue 99.90%, Score 0.8743]
-[Tile #1724, Row #31, Column #14, Tissue 99.51%, Score 0.8741]
-```
-
-If we'd like to obtain each tile as a NumPy array, we can do
-so by calling the `get_np_tile()` function on the `Tile`
-object.
-
-```
-tile_summary = tiles.dynamic_tiles(2)
-top_tiles = tile_summary.top_tiles()
-for t in top_tiles:
- print(t)
- np_tile = t.get_np_tile()
-```
-
-As a further example, here we dynamically retrieve the tiles
-for slide #4 and display the top 2 tiles along with their
-RGB and HSV histograms.
-
-```
-tile_summary = tiles.dynamic_tiles(4)
-top = tile_summary.top_tiles()[:2]
-for t in top:
- t.display_with_histograms()
-```
-
-| **Slide #4, Top Tile #1** | **Slide #4, Top Tile #2** |
-| ------------------------ | ------------------------------------ |
-|  |  |
-
-
-Next, we dynamically retrieve the tiles for slide #2. We
-display (not shown) the tile tissue heat map and top tile summaries and
-then obtain the tiles ordered by tissue percentage.
-We display the 1,000th and 1,500th tiles by tissue percentage.
-
-```
-tile_summary = tiles.dynamic_tiles(2)
-tile_summary.display_summaries()
-ts = tile_summary.tiles_by_tissue_percentage()
-ts[999].display_with_histograms()
-ts[1499].display_with_histograms()
-```
-
-Here we see the 1,000th and 1,500th tiles ordered by tissue percentage for slide #2.
-Note that the displayed tile rank information is based on score rather than
-tissue percentage alone.
-
-| **Slide #2, Tissue Percentage #1000** | **Slide #2, Tissue Percentage #1500** |
-| ------------------------ | ------------------------------------ |
-|  |  |
-
-
-Tiles can be retrieved based on position. Here, we display the tiles at row 25, column 30 and row 25, column 31 on slide #2.
-
-```
-tile_summary = tiles.dynamic_tiles(2)
-tile_summary.get_tile(25, 30).display_tile()
-tile_summary.get_tile(25, 31).display_tile()
-```
-
-| **Slide #2, Row #25, Column #30** | **Slide #2, Row #25, Column #31** |
-| ------------------------ | ------------------------------------ |
-|  |  |
-
-If an individual tile is required, the `dynamic_tile()` function can be used.
-
-```
-tiles.dynamic_tile(2, 25, 32).display_tile()
-```
-
-| **Slide #2, Row #25, Column #32** |
-| --------------------------------- |
-|  |
-
-If multiple tiles need to be retrieved dynamically, for performance reasons `dynamic_tiles()` is
-preferable to `dynamic_tile()`.
-
-
-## Summary
-
-In this tutorial, we've taken a look at how Python, in particular with packages such as NumPy and
-scikit-image, can be used for tissue segmentation in whole-slide images. In order to efficiently process
-images in our dataset, we utilized OpenSlide to scale down the slides. Using NumPy arrays, we
-investigated a wide variety of image filters and settled on a combination and series of filters that
-demonstrated fast, acceptably accurate tissue segmentation for our dataset. Following this, we divided
-the filtered images into tiles and scored the tiles based on tissue percentage and color characteristics
-such as the degree of hematoxylin staining versus eosin staining. We then demonstrated how we can
-retrieve the top-scoring tiles which have high tissue percentages and preferred staining characteristics.
-We saw how whole-slide images could be processed in batches or dynamically. Scaling, filtering,
-tiling, scoring, and saving the top tiles can be accomplished in batch mode using multiprocessing in
-the following manner.
-
-```
-slide.multiprocess_training_slides_to_images()
-filter.multiprocess_apply_filters_to_images()
-tiles.multiprocess_filtered_images_to_tiles()
-```
-
-The above code generates HTML filter and tile pages which simplify visual
-inspection of the image processing and the final tile results.
-
-Since the average number of pixels per whole-slide image is 7,670,709,629 and we have reduced
-the data to the top 50 1,024x1,024 pixel tiles, we have reduced the raw image data down by a
-factor of 146x while identifying tiles that have significant potential for further useful
-analysis.
diff --git a/example.ipynb b/example.ipynb
new file mode 100755
index 0000000..91b6fe5
--- /dev/null
+++ b/example.ipynb
@@ -0,0 +1,227 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "%matplotlib inline\n",
+ "#%matplotlib notebook\n",
+ "import numpy as np\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import pathlib\n",
+ "from pathlib import Path\n",
+ "Path.ls = lambda x: [p for p in list(x.iterdir()) if '.ipynb_checkpoints' not in p.name]\n",
+ "import pandas as pd\n",
+ "import PIL\n",
+ "from wsi import slide, filter, tiles, util\n",
+ "\n",
+ "\n",
+ "base_path = Path('/home/Deep_Learner/shared/Datasets/Hypophysenadenome/')\n",
+ "wsis_path = base_path/'wsis_experimenting'\n",
+ "rois_path = base_path/'rois_experimenting'\n",
+ "tiles_path = base_path/'tiles_experimenting'\n",
+ "wsis_path.mkdir(exist_ok=True)\n",
+ "tiles_path.mkdir(exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# WSIs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Process one WSI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "p = wsis_path.ls()[0];p\n",
+ "df = tiles.WsiOrROIToTiles(wsiPath=p, \n",
+ " tilesFolderPath=tiles_path, \n",
+ " tile_height=256,tile_width=256, \n",
+ " tile_naming_func=tiles.get_wsi_name_from_path_pituitary_adenoma_entities,\n",
+ " save_tiles=False, \n",
+ " tile_score_thresh = 0.55)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Process multiple WSIs in parallel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "df = tiles.WsiOrROIToTilesMultithreaded(wsis_path.ls()[:2], \n",
+ " tiles_path, \n",
+ " 256, \n",
+ " 256, \n",
+ " tile_naming_func=tiles.get_wsi_name_from_path_pituitary_adenoma_entities, \n",
+ " save_tiles=False)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Extract one tile directly from a WSI at a specific level"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p = wsis_path.ls()[0];p"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "s = slide.open_slide(p)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "s.level_dimensions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tiles.ExtractTileFromWSI(p, x=0,y=0, width=1232,height=840,level=5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ROIs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Process one ROI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p = rois_path.ls()[0];p\n",
+ "\n",
+ "tiles.WsiOrROIToTiles(wsiPath=p, \n",
+ " tilesFolderPath=tiles_path, \n",
+ " tile_height=1024,\n",
+ " tile_width=1024, \n",
+ " is_wsi=False, \n",
+ " tile_naming_func=tiles.get_roi_name_from_path_pituitary_adenoma_entities, \n",
+ " save_tiles=True)\n",
+ "\n",
+ "util.show_multiple_images_big(tiles_path.ls()[:2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Extract one tile directly from a ROI"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p = rois_path.ls()[0];p\n",
+ "\n",
+ "img = PIL.Image.open(p)\n",
+ "\n",
+ "print(img.size)\n",
+ "\n",
+ "tiles.ExtractTileFromPILImage(p, 1000, 1000, 512, 512)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "DLM Py3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {
+ "height": "calc(100% - 180px)",
+ "left": "10px",
+ "top": "150px",
+ "width": "258px"
+ },
+ "toc_section_display": true,
+ "toc_window_display": true
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/deephistopath/wsi/filter.py b/wsi/filter.py
old mode 100644
new mode 100755
similarity index 98%
rename from deephistopath/wsi/filter.py
rename to wsi/filter.py
index 1eaf388..7b9b480
--- a/deephistopath/wsi/filter.py
+++ b/wsi/filter.py
@@ -14,6 +14,8 @@
#
# ------------------------------------------------------------------------
+from pathlib import Path
+import PIL
import math
import multiprocessing
import numpy as np
@@ -27,9 +29,23 @@
import skimage.morphology as sk_morphology
import skimage.segmentation as sk_segmentation
-from deephistopath.wsi import slide
-from deephistopath.wsi import util
-from deephistopath.wsi.util import Time
+from wsi import slide, util, tiles
+from wsi.util import Time
+
+
+
+
+def filter_img(img_pil:PIL.Image.Image) -> PIL.Image.Image:
+ """
+
+ """
+ img_np = util.pil_to_np_rgb(img_pil)
+ grayscale_np = filter_rgb_to_grayscale(img_np)
+ complement_np = filter_complement(grayscale_np)
+ otsu_np = filter_otsu_threshold(complement_np).astype(np.bool)
+ filtered_img_np = util.mask_rgb(img_np, otsu_np)
+ return util.np_to_pil(filtered_img_np)
+
def filter_rgb_to_grayscale(np_img, output_type="uint8"):
@@ -45,12 +61,12 @@ def filter_rgb_to_grayscale(np_img, output_type="uint8"):
Returns:
Grayscale image as NumPy array with shape (h, w).
"""
- t = Time()
+ #t = Time()
# Another common RGB ratio possibility: [0.299, 0.587, 0.114]
grayscale = np.dot(np_img[..., :3], [0.2125, 0.7154, 0.0721])
if output_type != "float":
grayscale = grayscale.astype("uint8")
- util.np_info(grayscale, "Gray", t.elapsed())
+ #util.np_info(grayscale, "Gray", t.elapsed())
return grayscale
@@ -65,12 +81,12 @@ def filter_complement(np_img, output_type="uint8"):
Returns:
Complement image as Numpy array.
"""
- t = Time()
+ #t = Time()
if output_type == "float":
complement = 1.0 - np_img
else:
complement = 255 - np_img
- util.np_info(complement, "Complement", t.elapsed())
+ #util.np_info(complement, "Complement", t.elapsed())
return complement
@@ -110,7 +126,7 @@ def filter_otsu_threshold(np_img, output_type="uint8"):
Returns:
NumPy array (bool, float, or uint8) where True, 1.0, and 255 represent a pixel above Otsu threshold.
"""
- t = Time()
+ #t = Time()
otsu_thresh_value = sk_filters.threshold_otsu(np_img)
otsu = (np_img > otsu_thresh_value)
if output_type == "bool":
@@ -119,7 +135,7 @@ def filter_otsu_threshold(np_img, output_type="uint8"):
otsu = otsu.astype(float)
else:
otsu = otsu.astype("uint8") * 255
- util.np_info(otsu, "Otsu Threshold", t.elapsed())
+ #util.np_info(otsu, "Otsu Threshold", t.elapsed())
return otsu
diff --git a/wsi/openslide_overwrite.py b/wsi/openslide_overwrite.py
new file mode 100755
index 0000000..bed26be
--- /dev/null
+++ b/wsi/openslide_overwrite.py
@@ -0,0 +1,47 @@
+import sys
+import PIL
+
+def _load_image(buf, size):
+ '''buf must be a buffer.'''
+
+ # Load entire buffer at once if possible
+ MAX_PIXELS_PER_LOAD = (1 << 29) - 1
+ # Otherwise, use chunks smaller than the maximum to reduce memory
+ # requirements
+ PIXELS_PER_LOAD = 1 << 26
+
+ def do_load(buf, size):
+ '''buf can be a string, but should be a ctypes buffer to avoid an
+ extra copy in the caller.'''
+ # First reorder the bytes in a pixel from native-endian aRGB to
+ # big-endian RGBa to work around limitations in RGBa loader
+ rawmode = (sys.byteorder == 'little') and 'BGRA' or 'ARGB'
+ buf = PIL.Image.frombuffer('RGBA', size, buf, 'raw', rawmode, 0, 1)
+ # Image.tobytes() is named tostring() in Pillow 1.x and PIL
+ buf = (getattr(buf, 'tobytes', None) or buf.tostring)()
+ # Now load the image as RGBA, undoing premultiplication
+ return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBa', 0, 1)
+
+ # Fast path for small buffers
+ w, h = size
+ if w * h <= MAX_PIXELS_PER_LOAD:
+ return do_load(buf, size)
+
+ # Load in chunks to avoid OverflowError in PIL.Image.frombuffer()
+ # https://github.com/python-pillow/Pillow/issues/1475
+ if w > PIXELS_PER_LOAD:
+ # We could support this, but it seems like overkill
+ raise ValueError('Width %d is too large (maximum %d)' %
+ (w, PIXELS_PER_LOAD))
+ rows_per_load = PIXELS_PER_LOAD // w
+ img = PIL.Image.new('RGBA', (w, h))
+ for y in range(0, h, rows_per_load):
+ rows = min(h - y, rows_per_load)
+ if sys.version[0] == '2':
+ chunk = buffer(buf, 4 * y * w, 4 * rows * w)
+ else:
+ # PIL.Image.frombuffer() won't take a memoryview or
+ # bytearray, so we can't avoid copying
+ chunk = memoryview(buf)[y * w:(y + rows) * w].tobytes()
+ img.paste(do_load(chunk, (w, rows)), (0, y))
+ return img
diff --git a/wsi/slide.py b/wsi/slide.py
new file mode 100755
index 0000000..b802c51
--- /dev/null
+++ b/wsi/slide.py
@@ -0,0 +1,114 @@
+# ------------------------------------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ------------------------------------------------------------------------
+
+import pathlib
+from pathlib import Path
+import glob
+import math
+import matplotlib.pyplot as plt
+import multiprocessing
+import numpy as np
+import openslide
+from openslide import OpenSlideError
+import os
+import PIL
+from PIL import Image
+import re
+import sys
+from wsi import util, tiles
+from wsi.util import Time
+from typing import List, Callable, Union
+
+
+def open_slide(path:Union[str, pathlib.Path]):
+ """
+ Open a whole-slide image (*.svs,*.ndpi, etc).
+
+ Args:
+ path: Path to the slide file.
+
+ Returns:
+ An OpenSlide object representing a whole-slide image.
+ """
+ #try:
+ slide = openslide.open_slide(str(path))
+ #except OpenSlideError:
+ # slide = None
+ #except FileNotFoundError:
+ # slide = None
+ return slide
+
+
+def open_image(filename):
+ """
+ Open an image (*.jpg, *.png, etc).
+
+ Args:
+ filename: Name of the image file.
+
+ returns:
+ A PIL.Image.Image object representing an image.
+ """
+ image = Image.open(filename)
+ return image
+
+
+def open_image_np(filename):
+ """
+ Open an image (*.jpg, *.png, etc) as an RGB NumPy array.
+
+ Args:
+ filename: Name of the image file.
+
+ returns:
+ A NumPy representing an RGB image.
+ """
+ pil_img = open_image(filename)
+ np_img = util.pil_to_np_rgb(pil_img)
+ return np_img
+
+
+def small_to_large_mapping(small_pixel, large_dimensions, scale_factor):
+ """
+ Map a scaled-down pixel width and height to the corresponding pixel of the original whole-slide image.
+
+ Args:
+ small_pixel: The scaled-down width and height.
+ large_dimensions: The width and height of the original whole-slide image.
+
+ Returns:
+ Tuple consisting of the scaled-up width and height.
+ """
+ small_x, small_y = small_pixel
+ large_w, large_h = large_dimensions
+ large_x = round((large_w / scale_factor) / math.floor(large_w / scale_factor) * (scale_factor * small_x))
+ large_y = round((large_h / scale_factor) / math.floor(large_h / scale_factor) * (scale_factor * small_y))
+ return large_x, large_y
+
+
+def get_conversion_factor(wsi_path:pathlib.Path, level:int)->float:
+ """
+ Arguments:
+ wsi_path: path to a whole-slide image
+ level: level of the whole-slide image, 0 means highest resolution, with every level the resolution halves
+ Result:
+ returns a conversion factor, to convert pixel size into micrometer
+ """
+ sl = open_slide(wsi_path)
+ mpp_x = float(sl.properties.get('openslide.mpp-x'))
+ mpp_y = float(sl.properties.get('openslide.mpp-y'))
+ assert mpp_x==mpp_y
+ return mpp_x*2**level
\ No newline at end of file
diff --git a/wsi/tiles.py b/wsi/tiles.py
new file mode 100755
index 0000000..0c855b6
--- /dev/null
+++ b/wsi/tiles.py
@@ -0,0 +1,1219 @@
+# ------------------------------------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ------------------------------------------------------------------------
+
+# To get around renderer issue on macOS going from Matplotlib image to NumPy image.
+import matplotlib
+
+matplotlib.use('Agg')
+
+import PIL
+import pathlib
+from pathlib import Path
+import colorsys
+import math
+import matplotlib.pyplot as plt
+import multiprocessing
+import numpy
+import numpy as np
+import os
+import PIL
+from PIL import Image, ImageDraw, ImageFont
+from enum import Enum
+from wsi import util, filter, slide
+from wsi import openslide_overwrite
+from wsi.util import Time
+import openslide
+import multiprocessing
+from typing import List, Callable, Union, Dict, Tuple, Union
+from tqdm import tqdm_notebook as tqdm
+import pandas
+import pandas as pd
+import warnings
+
+
+
+TISSUE_HIGH_THRESH = 80
+TISSUE_LOW_THRESH = 10
+HSV_PURPLE = 270
+HSV_PINK = 330
+
+############################# classes #########################################
+
+
+
+class TileSummary:
+ """
+ Class for tile summary information.
+ """
+
+ wsi_path = None
+ is_wsi = None
+ tiles_folder_path = None
+ orig_w = None
+ orig_h = None
+ orig_tile_w = None
+ orig_tile_h = None
+ scale_factor = None
+ scaled_w = None
+ scaled_h = None
+ scaled_tile_w = None
+ scaled_tile_h = None
+ mask_percentage = None
+ num_row_tiles = None
+ num_col_tiles = None
+ tile_score_thresh = None
+ level = None
+ best_level_for_downsample = None
+ real_scale_factor = None
+
+ count = 0
+ high = 0
+ medium = 0
+ low = 0
+ none = 0
+
+ def __init__(self,
+ wsi_path,
+ is_wsi,
+ tiles_folder_path,
+ orig_w,
+ orig_h,
+ orig_tile_w,
+ orig_tile_h,
+ scale_factor,
+ scaled_w,
+ scaled_h,
+ scaled_tile_w,
+ scaled_tile_h,
+ tissue_percentage,
+ num_col_tiles,
+ num_row_tiles,
+ tile_score_thresh,
+ level,
+ best_level_for_downsample,
+ real_scale_factor):
+ self.wsi_path = wsi_path
+ self.is_wsi = is_wsi
+ self.tiles_folder_path = tiles_folder_path
+ self.orig_w = orig_w
+ self.orig_h = orig_h
+ self.orig_tile_w = orig_tile_w
+ self.orig_tile_h = orig_tile_h
+ self.scale_factor = scale_factor
+ self.scaled_w = scaled_w
+ self.scaled_h = scaled_h
+ self.scaled_tile_w = scaled_tile_w
+ self.scaled_tile_h = scaled_tile_h
+ self.tissue_percentage = tissue_percentage
+ self.num_col_tiles = num_col_tiles
+ self.num_row_tiles = num_row_tiles
+ self.tile_score_thresh = tile_score_thresh
+ self.level = level
+ self.best_level_for_downsample = best_level_for_downsample
+ self.real_scale_factor = real_scale_factor
+ self.tiles = []
+
+ def __str__(self):
+ return summary_title(self) + "\n" + summary_stats(self)
+
+ def mask_percentage(self):
+ """
+ Obtain the percentage of the slide that is masked.
+
+ Returns:
+ The amount of the slide that is masked as a percentage.
+ """
+ return 100 - self.tissue_percentage
+
+ def num_tiles(self):
+ """
+ Retrieve the total number of tiles.
+
+ Returns:
+ The total number of tiles (number of rows * number of columns).
+ """
+ return self.num_row_tiles * self.num_col_tiles
+
+ def tiles_by_tissue_percentage(self):
+ """
+ Retrieve the tiles ranked by tissue percentage.
+
+ Returns:
+ List of the tiles ranked by tissue percentage.
+ """
+ sorted_list = sorted(self.tiles, key=lambda t: t.tissue_percentage, reverse=True)
+ return sorted_list
+
+ def tiles_by_score(self):
+ """
+ Retrieve the tiles ranked by score.
+
+ Returns:
+ List of the tiles ranked by score.
+ """
+ sorted_list = sorted(self.tiles, key=lambda t: t.score, reverse=True)
+ return sorted_list
+
+ def get_tile(self, row, col):
+ """
+ Retrieve tile by row and column.
+
+ Args:
+ row: The row
+ col: The column
+
+ Returns:
+ Corresponding Tile object.
+ """
+ tile_index = (row - 1) * self.num_col_tiles + (col - 1)
+ tile = self.tiles[tile_index]
+ return tile
+
+ def top_tiles(self):
+ """
+ Retrieve only the tiles that pass scoring.
+
+ Returns:
+ List of the top-scoring tiles.
+ """
+ sorted_tiles = self.tiles_by_score()
+ top_tiles = [tile for tile in sorted_tiles
+ if self.check_tile(tile)]
+ print(f'{self.wsi_path}: Number of tiles that will be saved/all possible tiles: {len(top_tiles)}/{len(sorted_tiles)}')
+ return top_tiles
+
+ def check_tile(self, tile):
+ width = tile.o_c_e - tile.o_c_s
+ height = tile.o_r_e - tile.o_r_s
+ return tile.score > self.tile_score_thresh and width >= 0.7*self.orig_tile_w and height >= 0.7*self.orig_tile_h
+
+
+class Tile:
+ """
+ Class for information about a tile.
+ """
+ tile_summary = None
+ wsi_path = None
+ is_wsi = None
+ tiles_folder_path = None
+ np_scaled_tile = None
+ tile_num = None
+ r = None
+ c = None
+ r_s = None
+ r_e = None
+ c_s = None
+ c_e = None
+ o_r_s = None
+ o_r_e = None
+ o_c_s = None
+ o_c_e = None
+ t_p = None
+ color_factor = None
+ s_and_v_factor = None
+ quantity_factor = None
+ score = None
+ tile_naming_func = None
+ level = None
+ best_level_for_downsample = None
+ real_scale_factor = None
+
+ def __init__(self,
+ tile_summary,
+ wsi_path,
+ is_wsi,
+ tiles_folder_path,
+ np_scaled_tile,
+ tile_num,
+ r,
+ c,
+ r_s,
+ r_e,
+ c_s,
+ c_e,
+ o_r_s,
+ o_r_e,
+ o_c_s,
+ o_c_e,
+ t_p,
+ color_factor,
+ s_and_v_factor,
+ quantity_factor,
+ score,
+ tile_naming_func,
+ level,
+ best_level_for_downsample,
+ real_scale_factor):
+ self.tile_summary = tile_summary
+ self.wsi_path = wsi_path
+ self.is_wsi = is_wsi
+ self.tiles_folder_path = tiles_folder_path
+ self.np_scaled_tile = np_scaled_tile
+ self.tile_num = tile_num
+ self.r = r
+ self.c = c
+ self.r_s = r_s
+ self.r_e = r_e
+ self.c_s = c_s
+ self.c_e = c_e
+ self.o_r_s = o_r_s
+ self.o_r_e = o_r_e
+ self.o_c_s = o_c_s
+ self.o_c_e = o_c_e
+ self.tissue_percentage = t_p
+ self.color_factor = color_factor
+ self.s_and_v_factor = s_and_v_factor
+ self.quantity_factor = quantity_factor
+ self.score = score
+ self.tile_naming_func = tile_naming_func
+ self.level = level
+ self.best_level_for_downsample = best_level_for_downsample
+ self.real_scale_factor = real_scale_factor
+
+ def __str__(self):
+ return "[Tile #%d, Row #%d, Column #%d, Tissue %4.2f%%, Score %0.4f]" % (
+ self.tile_num, self.r, self.c, self.tissue_percentage, self.score)
+
+ def __repr__(self):
+ return "\n" + self.__str__()
+
+ def mask_percentage(self):
+ return 100 - self.tissue_percentage
+
+ def tissue_quantity(self):
+ return tissue_quantity(self.tissue_percentage)
+
+ def get_pil_tile(self):
+ return tile_to_pil_tile(self, self.is_wsi)
+
+ def get_np_tile(self):
+ return tile_to_np_tile(self)
+
+ def save_tile(self):
+ save_display_tile(self, save=True, display=False, is_wsi=self.is_wsi)
+
+ def display_tile(self):
+ save_display_tile(self, save=False, display=True, is_wsi=self.is_wsi)
+
+ def display_with_histograms(self):
+ display_tile(self, rgb_histograms=True, hsv_histograms=True)
+
+ def get_np_scaled_tile(self):
+ return self.np_scaled_tile
+
+ def get_pil_scaled_tile(self):
+ return util.np_to_pil(self.np_scaled_tile)
+
+ def get_width(self):
+ return self.o_c_e - self.o_c_s
+
+ def get_height(self):
+ return self.o_r_e - self.o_r_s
+
+ def get_x(self):
+ """
+ upper left x coordinate
+ """
+ return self.o_c_s
+
+ def get_y(self):
+ """
+ upper left x coordinate
+ """
+ return self.o_r_s
+
+ def get_path(self)->pathlib.Path:
+ return pathlib.Path(get_tile_image_path(self))
+
+ def get_name(self)->str:
+ return pathlib.Path(get_tile_image_path(self)).name
+
+
+
+class TissueQuantity(Enum):
+ NONE = 0
+ LOW = 1
+ MEDIUM = 2
+ HIGH = 3
+
+
+
+############################# functions #########################################
+
+def show_np_with_bboxes(img:numpy.ndarray, bboxes:List[numpy.ndarray], figsize:tuple=(10,10)):
+ """
+ Arguments:
+ img: img as numpy array
+ bboxes: List of bounding boxes where each bbox is a numpy array:
+ array([ x-upper-left, y-upper-left, width, height])
+ e.g. array([ 50., 211., 17., 19.])
+ """
+ # Create figure and axes
+ fig,ax = plt.subplots(1,1,figsize=figsize)
+ # Display the image
+ ax.imshow(img)
+ # Create a Rectangle patch for each bbox
+ for b in bboxes:
+ rect = matplotlib.patches.Rectangle((b[0],b[1]),b[2],b[3],linewidth=1,edgecolor='r',facecolor='none')
+ # Add the patch to the Axes
+ ax.add_patch(rect)
+ plt.show()
+
+def show_wsi_with_marked_tiles(figsize:Tuple[int] = (10,10),
+ scale_factor:int = 32,
+ tilesummary:TileSummary=None,
+ wsi_path:pathlib.Path=None,
+ df_tiles:pandas.DataFrame=None,
+ level:int = 0):
+ """
+ Either provide a TileSummary object or wsi_path, df_tiles and level.
+
+ Loads a whole slide image, scales it down, converts it into a numpy array and shows it with a grid overlay for all tiles
+ that passed scoring to visualize which tiles e.g. "tiles.WsiOrROIToTilesMultithreaded" calculated as worthy to keep.
+ Arguments:
+ figsize: Size of the plotted matplotlib figure containing the image.
+ scale_factor: The larger, the faster this method works, but the plotted image has less resolution.
+ tilesummary: a TileSummary object of one wsi
+ wsi_path: Path to a whole-slide image
+ df_tiles: A pandas dataframe from e.g. "tiles.WsiOrROIToTilesMultithreaded" with spacial information about all tiles
+ level: The level that was specified in e.g. "tiles.WsiOrROIToTilesMultithreaded". 0 means highest magnification.
+ """
+ if tilesummary != None:
+ wsi_pil, large_w, large_h, new_w, new_h, best_level_for_downsample = wsi_to_scaled_pil_image(tilesummary.wsi_path,
+ scale_factor=tilesummary.scale_factor,
+ level=tilesummary.level)
+ wsi_np = util.pil_to_np_rgb(wsi_pil)
+ boxes =[]
+ for tile in tilesummary.top_tiles():
+ box = np.array([tile.get_x(), tile.get_y(), tile.get_width(), tile.get_height()])/scale_factor
+ boxes.append(box)
+ show_np_with_bboxes(wsi_np, boxes, figsize)
+
+ else:
+ wsi_pil, large_w, large_h, new_w, new_h, best_level_for_downsample = wsi_to_scaled_pil_image(wsi_path,
+ scale_factor=scale_factor,
+ level=level)
+ wsi_np = util.pil_to_np_rgb(wsi_pil)
+ boxes =[]
+ for index, row in df_tiles.iterrows():
+ if row['wsi_path'] == wsi_path:
+ box = np.array([row['x_upper_left'], row['y_upper_left'], row['pixels_width'], row['pixels_height']])/scale_factor
+ boxes.append(box)
+
+ show_np_with_bboxes(wsi_np, boxes, figsize)
+
+
+def scoring_function_1(tissue_percent, combined_factor):
+ """
+ use this, if you want tissue with lots of cells (lots of hematoxylin stained tissue)
+ """
+ return tissue_percent * combined_factor / 1000.0
+
+def scoring_function_2(tissue_percent, combined_factor):
+ """
+ use this, if you mostly care that there is any tissue in the tile
+ """
+ return (tissue_percent ** 2) * np.log(1 + combined_factor) / 1000.0
+
+
+def ExtractTileFromWSI(path:Union[str, pathlib.Path], x:int, y:int, width:int, height:int, level:int)-> PIL.Image:
+ """
+ Args:
+ path: path to wsi
+ x: x-coordinate of the upper left pixel. The method assumes, that you know the dimensions of your specified level.
+ y: y-coordinate of the upper left pixel. The method assumes, that you know the dimensions of your specified level.
+ width: tile width
+ height: tile height
+ level: Level of the WSI you want to extract the tile from. 0 means highest resolution.
+
+ Return:
+ tile as PIL.Image as RGB
+ """
+ s = slide.open_slide(str(path))
+ tile_region = s.read_region((x, y), level, (width, height))
+ # RGBA to RGB
+ pil_img = tile_region.convert("RGB")
+ return pil_img
+
+def ExtractTileFromPILImage(path:Union[str, pathlib.Path], x:int, y:int, width:int, height:int)-> PIL.Image:
+ """
+ Args:
+ path: path to PIL Image
+ x: x-coordinate of the upper left pixel
+ y: y-coordinate of the upper left pixel
+ width: tile width
+ height: tile height
+
+ Return:
+ tile as PIL.Image as RGB
+ """
+ pil_img = PIL.Image.open(path)
+ pil_img = pil_img.crop((x, y, x+width, y+height))
+ return pil_img
+
+def get_roi_name_from_path_pituitary_adenoma_entities(roi_path):
+ path = Path(roi_path)
+ split = path.stem.split('-')
+ if split[2] == 'HE':
+ return f'{split[0]}-{split[1]}-{split[2]}-{split[3]}-{split[4]}'
+ else:
+ return f'{split[0]}-{split[1]}-{split[2]}-{split[3]}-{split[4]}-{split[5]}'
+
+def get_wsi_name_from_path_pituitary_adenoma_entities(wsi_path):
+ path = Path(wsi_path)
+ split = path.stem.split('-')
+ return f'{split[0]}-{split[1]}-{split[2]}-{split[3]}'
+
+
+def WsiOrROIToTiles(wsiPath:pathlib.Path,
+ tilesFolderPath:pathlib.Path,
+ tile_height:int,
+ tile_width:int,
+ tile_naming_func:Callable,
+ tile_score_thresh:float = 0.55,
+ tile_scoring_function = scoring_function_1,
+ is_wsi:bool = True,
+ level = 0,
+ save_tiles:bool = False,
+ return_as_tilesummary_object = False)-> Union[TileSummary, pandas.DataFrame]:
+ """
+ There is currently a bug with levels above 0. Tiles do not get scored correctly and empty tiles will pass scoring.
+
+ Calculates tile coordinates and returns them in a pandas dataframe. If save_tiles == True the tiles will also be extracted
+ and saved from the WSI or ROI (ROI is assumed to be a "normal" image format like .png).
+
+ Arguments:
+ wsiPath: Path to a WSI or ROI
+ tilesFolderPath: The folder where the extracted tiles will be saved (only needed if save_tiles=True).
+ tileHeigth: Number of pixels tile height.
+ tileWidth: Number of pixels tile width.
+ tile_score_thresh: Tiles with a score higher than the number from "tileScoringFunction" will be saved.
+ tileScoringFunction: Function to score one tile to determine if it should be saved or not.
+ is_wsi: if true, a WSI format like .ndpi is assumed, if false, a format like png is assumed (ROI)
+ tile_naming_func: A function, that takes a pathlib.Path to the WSI or ROI as an argument and returns a string.
+ This string will then be used as part of the name for the tile (plus some specific tile information and
+ the file format .png, whick is generated by this library).
+ level: Level of the WSI you want to extract the tile from. 0 means highest resolution.
+ save_tiles: if True the tiles will be extracted and saved to {tilesFolderPath}
+ return_as_tilesummary_object: return_as_tilesummary_object: Set this to true, if you
+ want the TileSummary object and not a pandas dataframe.
+ Return:
+ if return_as_tilesummary_object == True:
+ a TileSummary object will be returned
+ else:
+ pandas dataframe with coloumns: ['tile_name','wsi_path','level','x_upper_left','y_upper_left','pixels_width','pixels_height']
+ """
+ if(not is_wsi and level != 0):
+ raise ValueError("Specifiying a level only makes sense when extracting tiles from WSIs. Just leave the default value.")
+ if(tilesFolderPath is None and save_tiles == True):
+ raise ValueError("You should specify a {tilesFolderPath}")
+
+ print(f"Starting to process {str(wsiPath)}")
+ if(is_wsi):
+ scale_factor = 32
+ else:
+ scale_factor = 1
+ ### against DecompressionBombWarning
+ #mage.MAX_IMAGE_PIXELS = 10000000000000
+ openslide.lowlevel._load_image = openslide_overwrite._load_image
+ if(is_wsi):
+ img_pil, original_width, original_height, scaled_width, scaled_height, best_level_for_downsample = wsi_to_scaled_pil_image(wsiPath, scale_factor, level)
+ else:
+ img_pil = Image.open(wsiPath)
+ original_width = scaled_width = img_pil.width
+ original_height = scaled_height = img_pil.height
+ best_level_for_downsample = 0
+
+ img_pil_filtered = filter.filter_img(img_pil)
+ tilesummary = create_tilesummary(wsiPath,
+ is_wsi,
+ tilesFolderPath,
+ img_pil,
+ img_pil_filtered,
+ original_width,
+ original_height,
+ scaled_width,
+ scaled_height,
+ tile_height,
+ tile_width,
+ scale_factor,
+ tile_score_thresh,
+ tile_scoring_function,
+ tile_naming_func,
+ level,
+ best_level_for_downsample)
+
+ if(save_tiles):
+ for tile in tilesummary.top_tiles():
+ tile.save_tile()
+
+ if return_as_tilesummary_object:
+ return tilesummary
+
+ else:
+ rows_list = []
+ for tile in tilesummary.top_tiles():
+ row = {'tile_name':tile.get_name(),
+ 'wsi_path':tile.wsi_path,
+ 'level':tile.level,
+ 'x_upper_left':tile.get_x(),
+ 'y_upper_left':tile.get_y(),
+ 'pixels_width':tile.get_width(),
+ 'pixels_height':tile.get_height()}
+ rows_list.append(row)
+
+ if(len(rows_list) == 0):
+ return pd.DataFrame(columns=['tile_name','wsi_path', \
+ 'level','x_upper_left','y_upper_left','pixels_width','pixels_height'])
+ else:
+ return pd.DataFrame(rows_list).set_index('tile_name', inplace=False)
+
+
+def WsiOrROIToTilesMultithreaded(wsiPaths:List[pathlib.Path],
+ tilesFolderPath:pathlib.Path,
+ tileHeight:int,
+ tileWidth:int,
+ tile_naming_func:Callable,
+ tile_score_thresh:float = 0.55,
+ tileScoringFunction = scoring_function_1,
+ is_wsi = True,
+ level = 0,
+ save_tiles:bool = False,
+ return_as_tilesummary_object = False)-> Union[List[TileSummary], pandas.DataFrame]:
+ """
+ The method WsiOrROIToTiles for a list of WSIs/ROIs in parallel on multiple threads.
+
+ Arguments:
+ wsiPaths: A list of paths to the WSIs or ROIs
+ tilesFolderPath: The folder where the extracted tiles will be saved (only needed if save_tiles=True).
+ tileHeigth: Number of pixels tile height.
+ tileWidth: Number of pixels tile width.
+ tile_score_thresh: Tiles with a score higher than the number from "tileScoringFunction" will be saved.
+ tileScoringFunction: Function to score one tile to determine if it should be saved or not.
+ is_wsi: if true, a WSI format like .ndpi is assumed, if false, a format like png is assumed (ROI)
+ tile_naming_func: A function, that takes a pathlib.Path to the WSI or ROI as an argument and returns a string.
+ This string will then be used as part of the name for the tile (plus some specific tile information and
+ the file format .png, whick is generated by this library).
+ level: Level of the WSI you want to extract the tile from. 0 means highest resolution.
+ save_tiles: if True the tiles will be extracted and saved to {tilesFolderPath}
+ return_as_tilesummary_object: Set this to true, if you want the TileSummary object and not a pandas dataframe.
+ Return:
+ if return_as_tilesummary_object == True:
+ a List of TileSummary objects will be returned
+ else:
+ pandas dataframe with coloumns: ['tile_name','wsi_path','level','x_upper_left','y_upper_left','pixels_width','pixels_height']
+ """
+
+ pbar = tqdm(total=len(wsiPaths))
+ results = []
+ def update(res):
+ results.append(res)
+ pbar.update()
+
+ with multiprocessing.Pool() as pool:
+ for p in wsiPaths:
+ pool.apply_async(WsiOrROIToTiles,
+ args=(p,
+ tilesFolderPath,
+ tileHeight,
+ tileWidth,
+ tile_naming_func,
+ tile_score_thresh,
+ tileScoringFunction,
+ is_wsi,
+ level,
+ save_tiles,
+ return_as_tilesummary_object),
+ callback=update)
+
+
+ pool.close()
+ pool.join()
+
+ if return_as_tilesummary_object:
+ return results
+ else:
+ merged_df = None
+ for res in tqdm(results):
+ if merged_df is None:
+ merged_df = res
+ else:
+ merged_df = merged_df.append(res, sort=False)
+
+ return merged_df.drop_duplicates(inplace=False)
+
+
+def wsi_to_scaled_pil_image(wsi_filepath:pathlib.Path, scale_factor = 32, level = 0):
+ """
+ Convert a WSI training slide to a PIL image.
+
+ Args:
+
+ Returns:
+
+ """
+ #wsi = openslide.open_slide(str(wsi_filepath))
+ #large_w, large_h = wsi.dimensions
+ #new_w = math.floor(large_w / scale_factor)
+ #new_h = math.floor(large_h / scale_factor)
+ #level = wsi.get_best_level_for_downsample(scale_factor)
+ #img = wsi.read_region((0, 0), level, wsi.level_dimensions[level])
+ #img = img.convert("RGB")
+ #if(scale_factor > 1):
+ # img = img.resize((new_w, new_h), PIL.Image.BILINEAR)
+ #return img, large_w, large_h, new_w, new_h
+
+ wsi = openslide.open_slide(str(wsi_filepath))
+ large_w, large_h = wsi.level_dimensions[level]
+ best_level_for_downsample = wsi.get_best_level_for_downsample(scale_factor)
+ new_w, new_h = wsi.level_dimensions[best_level_for_downsample]
+ img = wsi.read_region((0, 0), best_level_for_downsample, wsi.level_dimensions[best_level_for_downsample])
+ img = img.convert("RGB")
+ return img, large_w, large_h, new_w, new_h, best_level_for_downsample
+
+
+
+def create_tilesummary(wsiPath,
+ is_wsi,
+ tilesFolderPath,
+ img_pil:PIL.Image.Image,
+ img_pil_filtered:PIL.Image.Image,
+ wsi_original_width:int,
+ wsi_original_height:int,
+ wsi_scaled_width:int,
+ wsi_scaled_height:int,
+ tile_height:int,
+ tile_width:int,
+ scale_factor:int,
+ tile_score_thresh:float,
+ tile_scoring_function,
+ tile_naming_func,
+ level:int,
+ best_level_for_downsample:int = 0)->TileSummary:
+ """
+
+ Args:
+
+ """
+ np_img = util.pil_to_np_rgb(img_pil)
+ np_img_filtered = util.pil_to_np_rgb(img_pil_filtered)
+
+ tile_sum = score_tiles(np_img,
+ np_img_filtered,
+ wsiPath,
+ is_wsi,
+ tilesFolderPath,
+ tile_height,
+ tile_width,
+ scale_factor,
+ wsi_original_width,
+ wsi_original_height,
+ wsi_scaled_width,
+ wsi_scaled_height,
+ tile_score_thresh,
+ tile_scoring_function,
+ tile_naming_func,
+ level,
+ best_level_for_downsample)
+
+ return tile_sum
+
+
+def get_num_tiles(rows, cols, row_tile_size, col_tile_size):
+ """
+ Obtain the number of vertical and horizontal tiles that an image can be divided into given a row tile size and
+ a column tile size.
+
+ Args:
+ rows: Number of rows.
+ cols: Number of columns.
+ row_tile_size: Number of pixels in a tile row.
+ col_tile_size: Number of pixels in a tile column.
+
+ Returns:
+ Tuple consisting of the number of vertical tiles and the number of horizontal tiles that the image can be divided
+ into given the row tile size and the column tile size.
+ """
+ num_row_tiles = math.ceil(rows / row_tile_size)
+ num_col_tiles = math.ceil(cols / col_tile_size)
+ return num_row_tiles, num_col_tiles
+
+
+def get_tile_indices(rows, cols, row_tile_size, col_tile_size):
+ """
+ Obtain a list of tile coordinates (starting row, ending row, starting column, ending column, row number, column number).
+
+ Args:
+ rows: Number of rows.
+ cols: Number of columns.
+ row_tile_size: Number of pixels in a tile row.
+ col_tile_size: Number of pixels in a tile column.
+
+ Returns:
+ List of tuples representing tile coordinates consisting of starting row, ending row,
+ starting column, ending column, row number, column number.
+ """
+ indices = list()
+ num_row_tiles, num_col_tiles = get_num_tiles(rows, cols, row_tile_size, col_tile_size)
+ for r in range(0, num_row_tiles):
+ start_r = r * row_tile_size
+ end_r = ((r + 1) * row_tile_size) if (r < num_row_tiles - 1) else rows
+ for c in range(0, num_col_tiles):
+ start_c = c * col_tile_size
+ end_c = ((c + 1) * col_tile_size) if (c < num_col_tiles - 1) else cols
+ indices.append((start_r, end_r, start_c, end_c, r + 1, c + 1))
+ return indices
+
+
+
+def tile_to_pil_tile(tile:Tile, is_wsi:bool):
+ """
+ Convert tile information into the corresponding tile as a PIL image read from the whole-slide image file.
+
+ Args:
+ tile: Tile object.
+ is_wsi: if true, a WSI format like .ndpi is assumed, if false, a format like png is assumed (ROI)
+
+ Return:
+ Tile as a PIL image.
+ """
+ #x, y = tile.o_c_s, tile.o_r_s
+ #width, height = tile.o_c_e - tile.o_c_s, tile.o_r_e - tile.o_r_s
+ x = tile.get_x()
+ y = tile.get_y()
+ width = tile.get_width()
+ height = tile.get_height()
+ if(is_wsi):
+ pil_img = ExtractTileFromWSI(tile.wsi_path, x, y, width, height, tile.level)
+ else:
+ pil_img = ExtractTileFromPILImage(tile.wsi_path, x, y, width, height)
+ return pil_img
+
+
+def tile_to_np_tile(tile, is_wsi:bool):
+ """
+ Convert tile information into the corresponding tile as a NumPy image read from the whole-slide image file.
+
+ Args:
+ tile: Tile object.
+ is_wsi: if true, a WSI format like .ndpi is assumed, if false, a format like png is assumed (ROI)
+
+ Return:
+ Tile as a NumPy image.
+ """
+ pil_img = tile_to_pil_tile(tile, is_wsi)
+ np_img = util.pil_to_np_rgb(pil_img)
+ return np_img
+
+
+
+def get_tile_image_path(tile:Tile):
+ """
+ Obtain tile image path based on tile information such as row, column, row pixel position, column pixel position,
+ pixel width, and pixel height.
+
+ Args:
+ tile: Tile object.
+
+ Returns:
+ Path to image tile.
+ """
+ t = tile
+ if tile.tiles_folder_path is None:
+ return os.path.join(tile.tile_naming_func(tile.wsi_path) + "-" + 'tile' + "-r%d-c%d-x%d-y%d-w%d-h%d" % (
+ t.r, t.c, t.o_c_s, t.o_r_s, t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s) + "." + 'png')
+ else:
+ return os.path.join(tile.tiles_folder_path,
+ tile.tile_naming_func(tile.wsi_path) + "-" + 'tile' + "-r%d-c%d-x%d-y%d-w%d-h%d" % (
+ t.r, t.c, t.o_c_s, t.o_r_s, t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s) + "." + 'png')
+
+
+
+def save_display_tile(tile, save, display, is_wsi:bool):
+ """
+ Save and/or display a tile image.
+
+ Args:
+ tile: Tile object.
+ save: If True, save tile image.
+ display: If True, dispaly tile image.
+ """
+ tile_pil_img = tile_to_pil_tile(tile, is_wsi)
+
+ if save:
+ t = Time()
+ img_path = get_tile_image_path(tile)
+ dir = os.path.dirname(img_path)
+ if not os.path.exists(dir):
+ os.makedirs(dir)
+ tile_pil_img.save(img_path)
+ #print("%-20s | Time: %-14s Name: %s" % ("Save Tile", str(t.elapsed()), img_path))
+
+ if display:
+ tile_pil_img.show()
+
+
+
+def score_tiles(img_np:np.array,
+ img_np_filtered:np.array,
+ wsi_path:pathlib.Path,
+ is_wsi:bool,
+ tilesFolderPath:pathlib.Path,
+ tile_height:int,
+ tile_width:int,
+ scale_factor:int,
+ wsi_original_width:int,
+ wsi_original_height:int,
+ wsi_scaled_width:int,
+ wsi_scaled_height:int,
+ tile_score_thresh:float,
+ tile_scoring_function,
+ tile_naming_func,
+ level:int,
+ best_level_for_downsample:int) -> TileSummary:
+ """
+ Score all tiles for a slide and return the results in a TileSummary object.
+
+ Args:
+
+ Returns:
+ TileSummary object which includes a list of Tile objects containing information about each tile.
+ """
+ #img_path = slide.get_filter_image_result(slide_num)
+ #o_w, o_h, w, h = slide.parse_dimensions_from_image_filename(img_path)
+ #np_img = slide.open_image_np(img_path)
+
+ #tile_height_scaled = round(tile_height / scale_factor) # use round?
+ #tile_width_scaled = round(tile_width / scale_factor) # use round?
+
+ real_scale_factor = int(math.pow(2,best_level_for_downsample-level))
+ tile_height_scaled = round(tile_height / real_scale_factor) # use round?
+ tile_width_scaled = round(tile_width / real_scale_factor) # use round?
+
+ num_row_tiles, num_col_tiles = get_num_tiles(wsi_scaled_height,
+ wsi_scaled_width,
+ tile_height_scaled,
+ tile_width_scaled)
+
+ tile_sum = TileSummary(wsi_path=wsi_path,
+ is_wsi=is_wsi,
+ tiles_folder_path=tilesFolderPath,
+ orig_w=wsi_original_width,
+ orig_h=wsi_original_height,
+ orig_tile_w=tile_width,
+ orig_tile_h=tile_height,
+ scale_factor=scale_factor,
+ scaled_w=wsi_scaled_width,
+ scaled_h=wsi_scaled_height,
+ scaled_tile_w=tile_width_scaled,
+ scaled_tile_h=tile_height_scaled,
+ tissue_percentage=filter.tissue_percent(img_np_filtered),
+ num_col_tiles=num_col_tiles,
+ num_row_tiles=num_row_tiles,
+ tile_score_thresh=tile_score_thresh,
+ level=level,
+ best_level_for_downsample=best_level_for_downsample,
+ real_scale_factor=real_scale_factor)
+
+
+ count = 0
+ high = 0
+ medium = 0
+ low = 0
+ none = 0
+ tile_indices = get_tile_indices(wsi_scaled_height, wsi_scaled_width, tile_height_scaled, tile_width_scaled)
+ for t in tile_indices:
+ count += 1 # tile_num
+ r_s, r_e, c_s, c_e, r, c = t
+ np_tile = img_np_filtered[r_s:r_e, c_s:c_e]
+ t_p = filter.tissue_percent(np_tile)
+ amount = tissue_quantity(t_p)
+ if amount == TissueQuantity.HIGH:
+ high += 1
+ elif amount == TissueQuantity.MEDIUM:
+ medium += 1
+ elif amount == TissueQuantity.LOW:
+ low += 1
+ elif amount == TissueQuantity.NONE:
+ none += 1
+
+ o_c_s, o_r_s = slide.small_to_large_mapping((c_s, r_s), (wsi_original_width, wsi_original_height), real_scale_factor)
+ #print("o_c_s: " + str(o_c_s))
+ #print("o_r_s: " + str(o_r_s))
+ o_c_e, o_r_e = slide.small_to_large_mapping((c_e, r_e), (wsi_original_width, wsi_original_height), real_scale_factor)
+ #print("o_c_e: " + str(o_c_e))
+ #print("o_r_e: " + str(o_r_e))
+
+ # pixel adjustment in case tile dimension too large (for example, 1025 instead of 1024)
+ if (o_c_e - o_c_s) > tile_width:
+ o_c_e -= 1
+ if (o_r_e - o_r_s) > tile_height:
+ o_r_e -= 1
+
+ score, color_factor, s_and_v_factor, quantity_factor = score_tile(np_tile, t_p, r, c, tile_scoring_function)
+
+ np_tile #if small_tile_in_tile else None
+
+ tile = Tile(tile_sum, wsi_path, is_wsi, tilesFolderPath, np_tile, count, r, c, r_s, r_e, c_s, c_e, o_r_s, o_r_e, o_c_s,
+ o_c_e, t_p, color_factor, s_and_v_factor, quantity_factor, score, tile_naming_func, level,
+ best_level_for_downsample, real_scale_factor)
+ tile_sum.tiles.append(tile)
+
+ tile_sum.count = count
+ tile_sum.high = high
+ tile_sum.medium = medium
+ tile_sum.low = low
+ tile_sum.none = none
+
+ tiles_by_score = tile_sum.tiles_by_score()
+ rank = 0
+ for t in tiles_by_score:
+ rank += 1
+ t.rank = rank
+
+ return tile_sum
+
+
+
+def score_tile(np_tile, tissue_percent, row, col, scoring_function):
+ """
+ Score tile based on tissue percentage, color factor, saturation/value factor, and tissue quantity factor.
+
+ Args:
+ np_tile: Tile as NumPy array.
+ tissue_percent: The percentage of the tile judged to be tissue.
+ slide_num: Slide number.
+ row: Tile row.
+ col: Tile column.
+
+ Returns tuple consisting of score, color factor, saturation/value factor, and tissue quantity factor.
+ """
+ color_factor = hsv_purple_pink_factor(np_tile)
+ s_and_v_factor = hsv_saturation_and_value_factor(np_tile)
+ amount = tissue_quantity(tissue_percent)
+ quantity_factor = tissue_quantity_factor(amount)
+ combined_factor = color_factor * s_and_v_factor
+ score = scoring_function(tissue_percent, combined_factor)
+
+ #if combined_factor != 0.0 or tissue_percent != 0.0:
+ # print(f'before: {score}')
+
+ # scale score to between 0 and 1
+ score = 1.0 - (10.0 / (10.0 + score))
+
+ #if combined_factor != 0.0 or tissue_percent != 0.0:
+ # print(f'after: {score}')
+
+ return score, color_factor, s_and_v_factor, quantity_factor
+
+def tissue_quantity_factor(amount):
+ """
+ Obtain a scoring factor based on the quantity of tissue in a tile.
+
+ Args:
+ amount: Tissue amount as a TissueQuantity enum value.
+
+ Returns:
+ Scoring factor based on the tile tissue quantity.
+ """
+ if amount == TissueQuantity.HIGH:
+ quantity_factor = 1.0
+ elif amount == TissueQuantity.MEDIUM:
+ quantity_factor = 0.2
+ elif amount == TissueQuantity.LOW:
+ quantity_factor = 0.1
+ else:
+ quantity_factor = 0.0
+ return quantity_factor
+
+
+def tissue_quantity(tissue_percentage):
+ """
+ Obtain TissueQuantity enum member (HIGH, MEDIUM, LOW, or NONE) for corresponding tissue percentage.
+
+ Args:
+ tissue_percentage: The tile tissue percentage.
+
+ Returns:
+ TissueQuantity enum member (HIGH, MEDIUM, LOW, or NONE).
+ """
+ if tissue_percentage >= TISSUE_HIGH_THRESH:
+ return TissueQuantity.HIGH
+ elif (tissue_percentage >= TISSUE_LOW_THRESH) and (tissue_percentage < TISSUE_HIGH_THRESH):
+ return TissueQuantity.MEDIUM
+ elif (tissue_percentage > 0) and (tissue_percentage < TISSUE_LOW_THRESH):
+ return TissueQuantity.LOW
+ else:
+ return TissueQuantity.NONE
+
+
+
+def rgb_to_hues(rgb):
+ """
+ Convert RGB NumPy array to 1-dimensional array of hue values (HSV H values in degrees).
+
+ Args:
+ rgb: RGB image as a NumPy array
+
+ Returns:
+ 1-dimensional array of hue values in degrees
+ """
+ hsv = filter.filter_rgb_to_hsv(rgb, display_np_info=False)
+ h = filter.filter_hsv_to_h(hsv, display_np_info=False)
+ return h
+
+
+def hsv_saturation_and_value_factor(rgb):
+ """
+ Function to reduce scores of tiles with narrow HSV saturations and values since saturation and value standard
+ deviations should be relatively broad if the tile contains significant tissue.
+
+ Example of a blurred tile that should not be ranked as a top tile:
+ ../data/tiles_png/006/TUPAC-TR-006-tile-r58-c3-x2048-y58369-w1024-h1024.png
+
+ Args:
+ rgb: RGB image as a NumPy array
+
+ Returns:
+ Saturation and value factor, where 1 is no effect and less than 1 means the standard deviations of saturation and
+ value are relatively small.
+ """
+ hsv = filter.filter_rgb_to_hsv(rgb, display_np_info=False)
+ s = filter.filter_hsv_to_s(hsv)
+ v = filter.filter_hsv_to_v(hsv)
+ s_std = np.std(s)
+ v_std = np.std(v)
+ if s_std < 0.05 and v_std < 0.05:
+ factor = 0.4
+ elif s_std < 0.05:
+ factor = 0.7
+ elif v_std < 0.05:
+ factor = 0.7
+ else:
+ factor = 1
+
+ factor = factor ** 2
+ return factor
+
+
+def hsv_purple_deviation(hsv_hues):
+ """
+ Obtain the deviation from the HSV hue for purple.
+
+ Args:
+ hsv_hues: NumPy array of HSV hue values.
+
+ Returns:
+ The HSV purple deviation.
+ """
+ purple_deviation = np.sqrt(np.mean(np.abs(hsv_hues - HSV_PURPLE) ** 2))
+ return purple_deviation
+
+
+def hsv_pink_deviation(hsv_hues):
+ """
+ Obtain the deviation from the HSV hue for pink.
+
+ Args:
+ hsv_hues: NumPy array of HSV hue values.
+
+ Returns:
+ The HSV pink deviation.
+ """
+ pink_deviation = np.sqrt(np.mean(np.abs(hsv_hues - HSV_PINK) ** 2))
+ return pink_deviation
+
+
+def hsv_purple_pink_factor(rgb):
+ """
+ Compute scoring factor based on purple and pink HSV hue deviations and degree to which a narrowed hue color range
+ average is purple versus pink.
+
+ Args:
+ rgb: Image an NumPy array.
+
+ Returns:
+ Factor that favors purple (hematoxylin stained) tissue over pink (eosin stained) tissue.
+ """
+ hues = rgb_to_hues(rgb)
+ hues = hues[hues >= 260] # exclude hues under 260
+ hues = hues[hues <= 340] # exclude hues over 340
+ if len(hues) == 0:
+ return 0 # if no hues between 260 and 340, then not purple or pink
+ pu_dev = hsv_purple_deviation(hues)
+ pi_dev = hsv_pink_deviation(hues)
+ avg_factor = (340 - np.average(hues)) ** 2
+
+ if pu_dev == 0: # avoid divide by zero if tile has no tissue
+ return 0
+
+ factor = pi_dev / pu_dev * avg_factor
+ return factor
+
+
+def hsv_purple_vs_pink_average_factor(rgb, tissue_percentage):
+ """
+ Function to favor purple (hematoxylin) over pink (eosin) staining based on the distance of the HSV hue average
+ from purple and pink.
+
+ Args:
+ rgb: Image as RGB NumPy array
+ tissue_percentage: Amount of tissue on the tile
+
+ Returns:
+ Factor, where >1 to boost purple slide scores, <1 to reduce pink slide scores, or 1 no effect.
+ """
+
+ factor = 1
+ # only applies to slides with a high quantity of tissue
+ if tissue_percentage < TISSUE_HIGH_THRESH:
+ return factor
+
+ hues = rgb_to_hues(rgb)
+ hues = hues[hues >= 200] # Remove hues under 200
+ if len(hues) == 0:
+ return factor
+ avg = np.average(hues)
+ # pil_hue_histogram(hues).show()
+
+ pu = HSV_PURPLE - avg
+ pi = HSV_PINK - avg
+ pupi = pu + pi
+ # print("Av: %4d, Pu: %4d, Pi: %4d, PuPi: %4d" % (avg, pu, pi, pupi))
+ # Av: 250, Pu: 20, Pi: 80, PuPi: 100
+ # Av: 260, Pu: 10, Pi: 70, PuPi: 80
+ # Av: 270, Pu: 0, Pi: 60, PuPi: 60 ** PURPLE
+ # Av: 280, Pu: -10, Pi: 50, PuPi: 40
+ # Av: 290, Pu: -20, Pi: 40, PuPi: 20
+ # Av: 300, Pu: -30, Pi: 30, PuPi: 0
+ # Av: 310, Pu: -40, Pi: 20, PuPi: -20
+ # Av: 320, Pu: -50, Pi: 10, PuPi: -40
+ # Av: 330, Pu: -60, Pi: 0, PuPi: -60 ** PINK
+ # Av: 340, Pu: -70, Pi: -10, PuPi: -80
+ # Av: 350, Pu: -80, Pi: -20, PuPi: -100
+
+ if pupi > 30:
+ factor *= 1.2
+ if pupi < -30:
+ factor *= .8
+ if pupi > 0:
+ factor *= 1.2
+ if pupi > 50:
+ factor *= 1.2
+ if pupi < -60:
+ factor *= .8
+
+ return factor
\ No newline at end of file
diff --git a/deephistopath/wsi/util.py b/wsi/util.py
old mode 100644
new mode 100755
similarity index 83%
rename from deephistopath/wsi/util.py
rename to wsi/util.py
index c2e3a9c..38ab6f5
--- a/deephistopath/wsi/util.py
+++ b/wsi/util.py
@@ -14,14 +14,43 @@
#
# ------------------------------------------------------------------------
+import pathlib
+from pathlib import Path
import datetime
import numpy as np
from PIL import Image, ImageDraw, ImageFont
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import fastai
# If True, display additional NumPy array stats (min, max, mean, is_binary).
ADDITIONAL_NP_STATS = False
+
+def show_np(np):
+ return util.np_to_pil(np)
+
+def show_multiple_images(paths:list, rows = 3, figsize=(128, 64)):
+ """
+ Args:
+ paths: A list of paths to images.
+ """
+ imgs = [fastai.vision.open_image(p) for p in paths]
+ fastai.vision.show_all(imgs=imgs, r=rows, figsize=figsize)
+
+def show_multiple_images_big(paths:list, axis_off:bool = False):
+ """
+ Args:
+ paths: A list of paths to images.
+ """
+ for p in paths:
+ plt.imshow(mpimg.imread(str(p)))
+ if(axis_off):
+ plt.axis('off')
+ plt.show()
+
+
def pil_to_np_rgb(pil_img):
"""
Convert a PIL Image to a NumPy array.
@@ -34,9 +63,9 @@ def pil_to_np_rgb(pil_img):
Returns:
The PIL image converted to a NumPy array.
"""
- t = Time()
+ #t = Time()
rgb = np.asarray(pil_img)
- np_info(rgb, "RGB", t.elapsed())
+ #np_info(rgb, "RGB", t.elapsed())
return rgb
@@ -124,9 +153,9 @@ def mask_rgb(rgb, mask):
Returns:
NumPy array representing an RGB image with mask applied.
"""
- t = Time()
+ #t = Time()
result = rgb * np.dstack([mask, mask, mask])
- np_info(result, "Mask RGB", t.elapsed())
+ #np_info(result, "Mask RGB", t.elapsed())
return result