From e1479281811e339a9499f77556122abed9d791c8 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Tue, 26 Jan 2021 15:38:47 -0800
Subject: [PATCH 01/19] Needless commit for git testing fork

---
 Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Makefile b/Makefile
index 112b1e68188a0..5ea35053a3b21 100644
--- a/Makefile
+++ b/Makefile
@@ -66,3 +66,8 @@ code-analysis:
 
 flake8-diff:
 	git diff upstream/main -u -- "*.py" | flake8 --diff
+
+install:
+	. ./venv38/bin/activate ; pip install --verbose --no-build-isolation --editable .
+install-test:
+	python -c "import sklearn; sklearn.show_versions()"

From cfae65e75dfa75d22938057b3d8a85cb5ea82a8d Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Mon, 8 Feb 2021 15:56:56 -0800
Subject: [PATCH 02/19] Basic cython structure and some dummy code compiling

---
 doc/conf.py                          |   1 +
 sklearn/neighbors/__init__.py        |   2 +
 sklearn/neighbors/_helpers.pxi       |  81 ++++++++++++++++++++++
 sklearn/neighbors/_trilateration.pyx | 100 +++++++++++++++++++++++++++
 sklearn/neighbors/_unsupervised.py   |   1 +
 sklearn/neighbors/setup.py           |   6 ++
 6 files changed, 191 insertions(+)
 create mode 100644 sklearn/neighbors/_helpers.pxi
 create mode 100644 sklearn/neighbors/_trilateration.pyx

diff --git a/doc/conf.py b/doc/conf.py
index adf12d9e88e82..62013d8891073 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -25,6 +25,7 @@
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
 sys.path.insert(0, os.path.abspath('sphinxext'))
+sys.path.insert(0, os.path.abspath('../venv38/lib/python3.8/site-packages/'))
 
 from github_link import make_linkcode_resolve
 import sphinx_gallery
diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 82f9993bec50c..cb9d55c22498c 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -5,6 +5,7 @@
 
 from ._ball_tree import BallTree
 from ._kd_tree import KDTree
+from ._trilateration import TrilaterationIndex
 from ._dist_metrics import DistanceMetric
 from ._graph import kneighbors_graph, radius_neighbors_graph
 from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer
@@ -20,6 +21,7 @@
 __all__ = ['BallTree',
            'DistanceMetric',
            'KDTree',
+           'TrilaterationIndex',
            'KNeighborsClassifier',
            'KNeighborsRegressor',
            'KNeighborsTransformer',
diff --git a/sklearn/neighbors/_helpers.pxi b/sklearn/neighbors/_helpers.pxi
new file mode 100644
index 0000000000000..130383aa1e81d
--- /dev/null
+++ b/sklearn/neighbors/_helpers.pxi
@@ -0,0 +1,81 @@
+# File is temporary, for moving around some dependencies
+
+cdef DTYPE_t[:, ::1] get_memview_DTYPE_2D(
+                               np.ndarray[DTYPE_t, ndim=2, mode='c'] X):
+    return <DTYPE_t[:X.shape[0], :X.shape[1]:1]> (<DTYPE_t*> X.data)
+
+cdef ITYPE_t[::1] get_memview_ITYPE_1D(
+                               np.ndarray[ITYPE_t, ndim=1, mode='c'] X):
+    return <ITYPE_t[:X.shape[0]:1]> (<ITYPE_t*> X.data)
+
+cdef inline void dual_swap(DTYPE_t* darr, ITYPE_t* iarr,
+                           ITYPE_t i1, ITYPE_t i2) nogil:
+    """swap the values at inex i1 and i2 of both darr and iarr"""
+    cdef DTYPE_t dtmp = darr[i1]
+    darr[i1] = darr[i2]
+    darr[i2] = dtmp
+
+    cdef ITYPE_t itmp = iarr[i1]
+    iarr[i1] = iarr[i2]
+    iarr[i2] = itmp
+
+cdef int _simultaneous_sort(DTYPE_t* dist, ITYPE_t* idx,
+                            ITYPE_t size) nogil except -1:
+    """
+    Perform a recursive quicksort on the dist array, simultaneously
+    performing the same swaps on the idx array.  The equivalent in
+    numpy (though quite a bit slower) is
+
+    def simultaneous_sort(dist, idx):
+        i = np.argsort(dist)
+        return dist[i], idx[i]
+    """
+    cdef ITYPE_t pivot_idx, i, store_idx
+    cdef DTYPE_t pivot_val
+
+    # in the small-array case, do things efficiently
+    if size <= 1:
+        pass
+    elif size == 2:
+        if dist[0] > dist[1]:
+            dual_swap(dist, idx, 0, 1)
+    elif size == 3:
+        if dist[0] > dist[1]:
+            dual_swap(dist, idx, 0, 1)
+        if dist[1] > dist[2]:
+            dual_swap(dist, idx, 1, 2)
+            if dist[0] > dist[1]:
+                dual_swap(dist, idx, 0, 1)
+    else:
+        # Determine the pivot using the median-of-three rule.
+        # The smallest of the three is moved to the beginning of the array,
+        # the middle (the pivot value) is moved to the end, and the largest
+        # is moved to the pivot index.
+        pivot_idx = size / 2
+        if dist[0] > dist[size - 1]:
+            dual_swap(dist, idx, 0, size - 1)
+        if dist[size - 1] > dist[pivot_idx]:
+            dual_swap(dist, idx, size - 1, pivot_idx)
+            if dist[0] > dist[size - 1]:
+                dual_swap(dist, idx, 0, size - 1)
+        pivot_val = dist[size - 1]
+
+        # partition indices about pivot.  At the end of this operation,
+        # pivot_idx will contain the pivot value, everything to the left
+        # will be smaller, and everything to the right will be larger.
+        store_idx = 0
+        for i in range(size - 1):
+            if dist[i] < pivot_val:
+                dual_swap(dist, idx, i, store_idx)
+                store_idx += 1
+        dual_swap(dist, idx, store_idx, size - 1)
+        pivot_idx = store_idx
+
+        # recursively sort each side of the pivot
+        if pivot_idx > 1:
+            _simultaneous_sort(dist, idx, pivot_idx)
+        if pivot_idx + 2 < size:
+            _simultaneous_sort(dist + pivot_idx + 1,
+                               idx + pivot_idx + 1,
+                               size - pivot_idx - 1)
+    return 0
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
new file mode 100644
index 0000000000000..c3271ef657edb
--- /dev/null
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -0,0 +1,100 @@
+#!python
+#cython: boundscheck=False
+#cython: wraparound=False
+#cython: cdivision=True
+
+# Trilateration (or n-fold lateration for arbitrary dimensions)
+# =====================
+#
+#    Author: Chip Lynch <cmlync02@louisville.edu>
+#    License: BSD something something
+#
+# The Trilateration Index itself is a only a data structure designed
+# to store distances from n (default 3 thus "tri") fixed points in a
+# sorted manner for quick approximate distance comparisons.  This is
+# most beneficial when the distance metric is complex (i.e. accurate
+# geographic distances on an oblate spheroid earth).
+#
+# Trilateration optimized nearest-neighbor and related algorithms will
+# create this structure during the "fit" phase, and leverage the anchored
+# distance data within neighbor searches.
+
+cimport numpy as np
+import numpy as np
+
+from ..utils import (
+    check_array
+)
+
+from ._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t
+from ._typedefs import DTYPE, ITYPE
+
+from ._dist_metrics cimport (DistanceMetric, euclidean_dist, euclidean_rdist,
+                             euclidean_dist_to_rdist, euclidean_rdist_to_dist)
+
+
+__all__ = ['TrilaterationIndex']
+
+DOC_DICT = {'TrilaterationIndex': 'TrilaterationIndex'}
+
+# Start simple:
+VALID_METRICS = ['EuclideanDistance']
+
+include "_helpers.pxi"
+
+cdef class TrilaterationIndex:
+    # __doc__ = CLASS_DOC.format(**DOC_DICT)
+    
+    cdef np.ndarray data_arr
+    cdef np.ndarray ref_points_arr
+    cdef np.ndarray idx_array_arr
+
+    def __cinit__(self):
+        self.data_arr = np.empty((1, 1), dtype=DTYPE, order='C')
+
+    def __init__(self, data, metric='euclidean', **kwargs):
+        self.data_arr = check_array(data, dtype=DTYPE, order='C')
+        n_samples = self.data_arr.shape[0]
+        n_features = self.data_arr.shape[1]
+
+        self.dist_metric = DistanceMetric.get_metric(metric, **kwargs)
+        self.euclidean = (self.dist_metric.__class__.__name__
+                          == 'EuclideanDistance')
+        metric = self.dist_metric.__class__.__name__
+        if metric not in VALID_METRICS:
+            raise ValueError('metric {metric} is not valid for '
+                             '{TrilaterationIndex}'.format(metric=metric,
+                                                   **DOC_DICT))
+        self.dist_metric._validate_data(self.data_arr)
+
+        # allocate arrays for storage
+        self.idx_array_arr = np.arange(n_samples, dtype=ITYPE)
+
+        self._update_memviews()
+
+        print(self.data_arr)
+
+    def _update_memviews(self):
+        self.data = get_memview_DTYPE_2D(self.data_arr)
+        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
+        self.ref_points = get_memview_DTYPE_2D(self.ref_points_arr)
+
+    def query(self, X, k=5,
+              return_distance=True,
+              sort_results=True):
+        """
+        query the index for k nearest neighbors
+        """
+
+        if X.shape[X.ndim - 1] != self.data.shape[1]:
+            raise ValueError("query data dimension must "
+                             "match training data dimension")
+        
+        if self.data.shape[0] < k:
+            raise ValueError("k must be less than or equal "
+                             "to the number of training points")
+        
+
+
+
+    pass
diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py
index 822a30f503bd2..1b57da81dacb2 100644
--- a/sklearn/neighbors/_unsupervised.py
+++ b/sklearn/neighbors/_unsupervised.py
@@ -28,6 +28,7 @@ class NearestNeighbors(KNeighborsMixin,
 
         - 'ball_tree' will use :class:`BallTree`
         - 'kd_tree' will use :class:`KDTree`
+        - 'trilateration' will use :class:`TrilaterationIndex`
         - 'brute' will use a brute-force search.
         - 'auto' will attempt to decide the most appropriate algorithm
           based on the values passed to :meth:`fit` method.
diff --git a/sklearn/neighbors/setup.py b/sklearn/neighbors/setup.py
index 9264044678193..a8b6d98cf1ec4 100644
--- a/sklearn/neighbors/setup.py
+++ b/sklearn/neighbors/setup.py
@@ -20,6 +20,11 @@ def configuration(parent_package='', top_path=None):
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
 
+    config.add_extension('_trilateration',
+                         sources=['_trilateration.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
     config.add_extension('_dist_metrics',
                          sources=['_dist_metrics.pyx'],
                          include_dirs=[numpy.get_include(),
@@ -31,6 +36,7 @@ def configuration(parent_package='', top_path=None):
                          sources=['_typedefs.pyx'],
                          include_dirs=[numpy.get_include()],
                          libraries=libraries)
+
     config.add_extension("_quad_tree",
                          sources=["_quad_tree.pyx"],
                          include_dirs=[numpy.get_include()],

From 44053ae426480c2c18c9de2aa00c98db9bfe153e Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Wed, 10 Feb 2021 16:57:38 -0800
Subject: [PATCH 03/19] TrilaterationIndex.query is working

---
 sklearn/neighbors/_helpers.pxi       |  13 +++
 sklearn/neighbors/_trilateration.pyx | 140 ++++++++++++++++++++++++++-
 2 files changed, 149 insertions(+), 4 deletions(-)

diff --git a/sklearn/neighbors/_helpers.pxi b/sklearn/neighbors/_helpers.pxi
index 130383aa1e81d..0e4f2efd632a9 100644
--- a/sklearn/neighbors/_helpers.pxi
+++ b/sklearn/neighbors/_helpers.pxi
@@ -1,5 +1,7 @@
 # File is temporary, for moving around some dependencies
 
+from libc.math cimport fabs
+
 cdef DTYPE_t[:, ::1] get_memview_DTYPE_2D(
                                np.ndarray[DTYPE_t, ndim=2, mode='c'] X):
     return <DTYPE_t[:X.shape[0], :X.shape[1]:1]> (<DTYPE_t*> X.data)
@@ -19,6 +21,17 @@ cdef inline void dual_swap(DTYPE_t* darr, ITYPE_t* iarr,
     iarr[i1] = iarr[i2]
     iarr[i2] = itmp
 
+cdef int _find_nearest_sorted_2D(DTYPE_t[:,:] rdists, DTYPE_t target):
+    """ rdists must be sorted by its first column.
+    Can we do this faster with Memoryviews?  See _simultaneous_sort
+    """
+    cdef int idx
+    idx = np.searchsorted(rdists[:,0], target, side="left")
+    if idx > 0 and (idx == len(rdists) or fabs(target - rdists[idx-1,0]) < fabs(target - rdists[idx,0])):
+        idx = idx - 1
+    return idx
+
+
 cdef int _simultaneous_sort(DTYPE_t* dist, ITYPE_t* idx,
                             ITYPE_t size) nogil except -1:
     """
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index c3271ef657edb..745955b117263 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -21,6 +21,9 @@
 
 cimport numpy as np
 import numpy as np
+from libc.math cimport fabs
+
+from scipy.spatial.distance import cdist
 
 from ..utils import (
     check_array
@@ -48,18 +51,32 @@ cdef class TrilaterationIndex:
     cdef np.ndarray data_arr
     cdef np.ndarray ref_points_arr
     cdef np.ndarray idx_array_arr
+    cdef np.ndarray r0sortorder
+    cdef np.ndarray ref_dists
+
+    cdef readonly DTYPE_t[:, ::1] data
+    cdef readonly DTYPE_t[:, ::1] ref_points
+    cdef public ITYPE_t[::1] idx_array
+    cdef readonly DTYPE_t[:, ::1] distances
+
+    cdef DistanceMetric dist_metric
 
     def __cinit__(self):
         self.data_arr = np.empty((1, 1), dtype=DTYPE, order='C')
 
-    def __init__(self, data, metric='euclidean', **kwargs):
+    def __init__(self, data, ref, metric='euclidean', **kwargs):
         self.data_arr = check_array(data, dtype=DTYPE, order='C')
+        self.ref_points_arr = check_array(ref, dtype=DTYPE, order='C')
+
+        if self.data_arr.shape[1] != self.ref_points_arr.shape[1]:
+            raise ValueError("Data and reference points must share same dimension")
+
         n_samples = self.data_arr.shape[0]
         n_features = self.data_arr.shape[1]
 
         self.dist_metric = DistanceMetric.get_metric(metric, **kwargs)
-        self.euclidean = (self.dist_metric.__class__.__name__
-                          == 'EuclideanDistance')
+        # self.euclidean = (self.dist_metric.__class__.__name__
+        #                   == 'EuclideanDistance')
         metric = self.dist_metric.__class__.__name__
         if metric not in VALID_METRICS:
             raise ValueError('metric {metric} is not valid for '
@@ -67,17 +84,34 @@ cdef class TrilaterationIndex:
                                                    **DOC_DICT))
         self.dist_metric._validate_data(self.data_arr)
 
+        self.ref_dists = self.dist_metric.pairwise(self.data_arr, self.ref_points_arr)
+
         # allocate arrays for storage
         self.idx_array_arr = np.arange(n_samples, dtype=ITYPE)
 
+
+        # Sort data by distance to refpoint[0]
+        # TODO: See _simultaneous sort to see if we can speed this up:
+        # (Right now since self.distances is 2D, we can't use this)
+        # _simultaneous_sort(&self.distances,
+        #                    &self.idx_array,
+        #                    self.distances.shape[0])
+
+        r0sortorder = np.argsort(self.ref_dists[:,0])
+        self.ref_dists = self.ref_dists[r0sortorder]
+        self.idx_array_arr = self.idx_array_arr[r0sortorder]
+
         self._update_memviews()
+        _find_nearest_sorted_2D(self.distances, -2)
+        _find_nearest_sorted_2D(self.distances, 62)
+        _find_nearest_sorted_2D(self.distances, 100)
 
-        print(self.data_arr)
 
     def _update_memviews(self):
         self.data = get_memview_DTYPE_2D(self.data_arr)
         self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
         self.ref_points = get_memview_DTYPE_2D(self.ref_points_arr)
+        self.distances = get_memview_DTYPE_2D(self.ref_dists)
 
     def query(self, X, k=5,
               return_distance=True,
@@ -94,6 +128,104 @@ cdef class TrilaterationIndex:
             raise ValueError("k must be less than or equal "
                              "to the number of training points")
         
+        # Establish the distances from the query point to the reference points
+        cdef np.ndarray q_dists
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+
+        # cdef DTYPE_t[:, ::1] Xarr
+        # Xarr = get_memview_DTYPE_2D(np.asarray(X))
+        # Xarr = np.asarray(X, dtype=DTYPE, order='C')
+        cdef int best_idx, low_idx, low_idx_possible, high_idx, high_idx_possible
+        cdef int test_idx
+        cdef DTYPE_t best_dist, test_dist
+
+        best_idx = _find_nearest_sorted_2D(self.distances, q_dists[0,0])
+        # best_dist = self.dist_metric.dist(self.data[best_idx,:], &Xarr, X.shape[1])
+        # print(self.data_arr[best_idx,:].shape)
+        best_dist = cdist([self.data_arr[best_idx,:]], X)
+
+        print("first best guess:")
+        print(best_idx)
+        print(np.asarray(self.data[best_idx,:]))
+
+        print("starting best distance")
+        print(best_dist)
+
+        # Establish bounds between which to search
+        low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - best_dist)
+        high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + best_dist)
+        low_idx = max(best_idx - 1, 0)
+        high_idx = min(best_idx + 1, self.distances.shape[0])
+        print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible}")
+
+        # Consider adding chunking here
+        # So we test more than one new point at a time
+        test_idx = best_idx
+
+        while True:
+            if low_idx <= low_idx_possible and high_idx >= high_idx_possible:
+                break
+
+            # Determine whether the next high or low point is a better test:
+            lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
+            highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
+            print(f"comparing: {low_idx}, {high_idx}")
+            print(f"lowdelta: {lowdelta}, highdelta: {highdelta}")
+            if lowdelta <= highdelta and low_idx >= low_idx_possible:
+                test_idx = low_idx
+                low_idx = low_idx - 1
+            elif high_idx <= high_idx_possible:
+                test_idx = high_idx
+                high_idx = high_idx + 1
+            elif low_idx >= low_idx_possible:
+                test_idx = low_idx
+                low_idx = low_idx - 1
+            else:
+                print("why?")
+                break
+
+            print(f"testing point at: {self.idx_array[test_idx]} {np.asarray(self.data[self.idx_array[test_idx],:])}")
+            # Check that all pre-calculated distances are better than best
+            sufficient = True
+            for d, q in zip(self.distances[test_idx,1:], q_dists[0,1:]):
+                print(f"testing: ({abs(d-q)}) vs {best_dist}")
+                if abs(d-q) > best_dist:
+                    sufficient = False
+                    break
+            
+            if sufficient:
+                test_dist = cdist([self.data_arr[self.idx_array[test_idx],:]], X)
+                print(f"{test_idx} is sufficient... test_dist is: {test_dist}")
+                if test_dist < best_dist:
+                    print("replacing best with test...")
+                    print(f"new best idx: {best_idx} ({self.idx_array[best_idx]}) with dist: {test_dist}")
+                    best_idx = test_idx
+                    best_dist = test_dist
+                    low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - best_dist)
+                    high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + best_dist)
+
+            continue
+
+        return (self.idx_array[best_idx], best_dist)
+
+
+
+    def query_radius(self, X, r,
+                     int return_distance=False,
+                     int count_only=False,
+                     int sort_results=False):
+        """
+        query the index for neighbors within a radius r
+        """
+        if count_only and return_distance:
+            raise ValueError("count_only and return_distance "
+                             "cannot both be true")
+
+        if sort_results and not return_distance:
+            raise ValueError("return_distance must be True "
+                             "if sort_results is True")
+        
+
 
 
 

From 4fc8bf6388ced4b73f67b94c4f650004aace823a Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Thu, 11 Feb 2021 13:03:10 -0800
Subject: [PATCH 04/19] Fixed big bug

---
 sklearn/neighbors/_helpers.pxi       | 116 +++++++++++++++++++++++++++
 sklearn/neighbors/_trilateration.pyx |  71 +++++++++++++---
 trilat_test_10.py                    |  83 +++++++++++++++++++
 3 files changed, 257 insertions(+), 13 deletions(-)
 create mode 100644 trilat_test_10.py

diff --git a/sklearn/neighbors/_helpers.pxi b/sklearn/neighbors/_helpers.pxi
index 0e4f2efd632a9..2306f573a9afd 100644
--- a/sklearn/neighbors/_helpers.pxi
+++ b/sklearn/neighbors/_helpers.pxi
@@ -10,6 +10,10 @@ cdef ITYPE_t[::1] get_memview_ITYPE_1D(
                                np.ndarray[ITYPE_t, ndim=1, mode='c'] X):
     return <ITYPE_t[:X.shape[0]:1]> (<ITYPE_t*> X.data)
 
+cdef ITYPE_t[:, ::1] get_memview_ITYPE_2D(
+                               np.ndarray[ITYPE_t, ndim=2, mode='c'] X):
+    return <ITYPE_t[:X.shape[0], :X.shape[1]:1]> (<ITYPE_t*> X.data)
+
 cdef inline void dual_swap(DTYPE_t* darr, ITYPE_t* iarr,
                            ITYPE_t i1, ITYPE_t i2) nogil:
     """swap the values at inex i1 and i2 of both darr and iarr"""
@@ -32,6 +36,118 @@ cdef int _find_nearest_sorted_2D(DTYPE_t[:,:] rdists, DTYPE_t target):
     return idx
 
 
+cdef class NeighborsHeap:
+    """A max-heap structure to keep track of distances/indices of neighbors
+
+    This implements an efficient pre-allocated set of fixed-size heaps
+    for chasing neighbors, holding both an index and a distance.
+    When any row of the heap is full, adding an additional point will push
+    the furthest point off the heap.
+
+    Parameters
+    ----------
+    n_pts : int
+        the number of heaps to use
+    n_nbrs : int
+        the size of each heap.
+    """
+    cdef np.ndarray distances_arr
+    cdef np.ndarray indices_arr
+
+    cdef DTYPE_t[:, ::1] distances
+    cdef ITYPE_t[:, ::1] indices
+
+    def __cinit__(self):
+        self.distances_arr = np.zeros((1, 1), dtype=DTYPE, order='C')
+        self.indices_arr = np.zeros((1, 1), dtype=ITYPE, order='C')
+        self.distances = get_memview_DTYPE_2D(self.distances_arr)
+        self.indices = get_memview_ITYPE_2D(self.indices_arr)
+
+    def __init__(self, n_pts, n_nbrs):
+        self.distances_arr = np.full((n_pts, n_nbrs), np.inf, dtype=DTYPE,
+                                     order='C')
+        self.indices_arr = np.zeros((n_pts, n_nbrs), dtype=ITYPE, order='C')
+        self.distances = get_memview_DTYPE_2D(self.distances_arr)
+        self.indices = get_memview_ITYPE_2D(self.indices_arr)
+
+    def get_arrays(self, sort=True):
+        """Get the arrays of distances and indices within the heap.
+
+        If sort=True, then simultaneously sort the indices and distances,
+        so the closer points are listed first.
+        """
+        if sort:
+            self._sort()
+        return self.distances_arr, self.indices_arr
+
+    cdef inline DTYPE_t largest(self, ITYPE_t row) nogil except -1:
+        """Return the largest distance in the given row"""
+        return self.distances[row, 0]
+
+    def push(self, ITYPE_t row, DTYPE_t val, ITYPE_t i_val):
+        return self._push(row, val, i_val)
+
+    cdef int _push(self, ITYPE_t row, DTYPE_t val,
+                   ITYPE_t i_val) nogil except -1:
+        """push (val, i_val) into the given row"""
+        cdef ITYPE_t i, ic1, ic2, i_swap
+        cdef ITYPE_t size = self.distances.shape[1]
+        cdef DTYPE_t* dist_arr = &self.distances[row, 0]
+        cdef ITYPE_t* ind_arr = &self.indices[row, 0]
+
+        # check if val should be in heap
+        if val > dist_arr[0]:
+            return 0
+
+        # insert val at position zero
+        dist_arr[0] = val
+        ind_arr[0] = i_val
+
+        # descend the heap, swapping values until the max heap criterion is met
+        i = 0
+        while True:
+            ic1 = 2 * i + 1
+            ic2 = ic1 + 1
+
+            if ic1 >= size:
+                break
+            elif ic2 >= size:
+                if dist_arr[ic1] > val:
+                    i_swap = ic1
+                else:
+                    break
+            elif dist_arr[ic1] >= dist_arr[ic2]:
+                if val < dist_arr[ic1]:
+                    i_swap = ic1
+                else:
+                    break
+            else:
+                if val < dist_arr[ic2]:
+                    i_swap = ic2
+                else:
+                    break
+
+            dist_arr[i] = dist_arr[i_swap]
+            ind_arr[i] = ind_arr[i_swap]
+
+            i = i_swap
+
+        dist_arr[i] = val
+        ind_arr[i] = i_val
+
+        return 0
+
+    cdef int _sort(self) except -1:
+        """simultaneously sort the distances and indices"""
+        cdef DTYPE_t[:, ::1] distances = self.distances
+        cdef ITYPE_t[:, ::1] indices = self.indices
+        cdef ITYPE_t row
+        for row in range(distances.shape[0]):
+            _simultaneous_sort(&distances[row, 0],
+                               &indices[row, 0],
+                               distances.shape[1])
+        return 0
+
 cdef int _simultaneous_sort(DTYPE_t* dist, ITYPE_t* idx,
                             ITYPE_t size) nogil except -1:
     """
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 745955b117263..849c3e1f7bf52 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -64,8 +64,16 @@ cdef class TrilaterationIndex:
     def __cinit__(self):
         self.data_arr = np.empty((1, 1), dtype=DTYPE, order='C')
 
-    def __init__(self, data, ref, metric='euclidean', **kwargs):
+    def __init__(self, data, ref=None, metric='euclidean', **kwargs):
         self.data_arr = check_array(data, dtype=DTYPE, order='C')
+
+        # Ref points can be passed in
+        # Otherwise they are generated based on input data
+        if ref is None:
+            ref = self._create_ref_points()
+            print("generated synthetic reference points:")
+            print(ref)
+
         self.ref_points_arr = check_array(ref, dtype=DTYPE, order='C')
 
         if self.data_arr.shape[1] != self.ref_points_arr.shape[1]:
@@ -102,10 +110,23 @@ cdef class TrilaterationIndex:
         self.idx_array_arr = self.idx_array_arr[r0sortorder]
 
         self._update_memviews()
-        _find_nearest_sorted_2D(self.distances, -2)
-        _find_nearest_sorted_2D(self.distances, 62)
-        _find_nearest_sorted_2D(self.distances, 100)
+        # _find_nearest_sorted_2D(self.distances, -2)
+        # _find_nearest_sorted_2D(self.distances, 62)
+        # _find_nearest_sorted_2D(self.distances, 100)
+
+    def _create_ref_points(self):
+        """
+        Create a set of well distributed reference points
+        in the same number of dimensions as the input data
+
+        For the love of all that is holy, make this better.
+        """
 
+        cdef DTYPE_t MAX_REF = 9999.0
+
+        ndims = self.data_arr.shape[1]
+        refs = [[0]*i+[MAX_REF]*(ndims-i) for i in range(ndims)]
+        return np.asarray(refs)
 
     def _update_memviews(self):
         self.data = get_memview_DTYPE_2D(self.data_arr)
@@ -120,6 +141,31 @@ cdef class TrilaterationIndex:
         query the index for k nearest neighbors
         """
 
+        if k != 1:
+            raise ValueError("Right now this only works for k=1")
+
+        print(f"X.shape: {X.shape}")
+
+        if X.shape[0] == 1:
+            results=self._query_one(X, k, return_distance, sort_results)
+        else:
+            results = [self._query_one(x, k=k,
+                              return_distance=return_distance,
+                              sort_results=sort_results) \
+                       for x in X]
+
+        return results
+
+    def _query_one(self, X, k=5,
+              return_distance=True,
+              sort_results=True):
+        """
+        query the index for k nearest neighbors
+        """
+        if X.shape[0] != 1:
+            raise ValueError("_query takes only a single X point"
+                             "use query for multiple records")
+
         if X.shape[X.ndim - 1] != self.data.shape[1]:
             raise ValueError("query data dimension must "
                              "match training data dimension")
@@ -127,7 +173,9 @@ cdef class TrilaterationIndex:
         if self.data.shape[0] < k:
             raise ValueError("k must be less than or equal "
                              "to the number of training points")
-        
+
+        cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
+
         # Establish the distances from the query point to the reference points
         cdef np.ndarray q_dists
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
@@ -142,9 +190,9 @@ cdef class TrilaterationIndex:
         best_idx = _find_nearest_sorted_2D(self.distances, q_dists[0,0])
         # best_dist = self.dist_metric.dist(self.data[best_idx,:], &Xarr, X.shape[1])
         # print(self.data_arr[best_idx,:].shape)
-        best_dist = cdist([self.data_arr[best_idx,:]], X)
+        best_dist = cdist([self.data_arr[self.idx_array_arr[best_idx],:]], X)
 
-        print("first best guess:")
+        print(f"first best guess: {best_idx} data[{self.idx_array_arr[best_idx]}]")
         print(best_idx)
         print(np.asarray(self.data[best_idx,:]))
 
@@ -164,6 +212,7 @@ cdef class TrilaterationIndex:
 
         while True:
             if low_idx <= low_idx_possible and high_idx >= high_idx_possible:
+                print(f"breaking because {low_idx} <= {low_idx_possible} and {high_idx} >= {high_idx_possible}")
                 break
 
             # Determine whether the next high or low point is a better test:
@@ -184,11 +233,11 @@ cdef class TrilaterationIndex:
                 print("why?")
                 break
 
-            print(f"testing point at: {self.idx_array[test_idx]} {np.asarray(self.data[self.idx_array[test_idx],:])}")
+            print(f"testing point at index {test_idx}: data[{self.idx_array[test_idx]}] {np.asarray(self.data[self.idx_array[test_idx],:])}")
             # Check that all pre-calculated distances are better than best
             sufficient = True
             for d, q in zip(self.distances[test_idx,1:], q_dists[0,1:]):
-                print(f"testing: ({abs(d-q)}) vs {best_dist}")
+                print(f"testing that: {abs(d-q)} < {best_dist}")
                 if abs(d-q) > best_dist:
                     sufficient = False
                     break
@@ -224,9 +273,5 @@ cdef class TrilaterationIndex:
         if sort_results and not return_distance:
             raise ValueError("return_distance must be True "
                              "if sort_results is True")
-        
-
-
-
 
     pass
diff --git a/trilat_test_10.py b/trilat_test_10.py
new file mode 100644
index 0000000000000..422ecb1220f82
--- /dev/null
+++ b/trilat_test_10.py
@@ -0,0 +1,83 @@
+# File to debug
+# This file should never be published...
+# Purge it from git before merging
+
+import csv
+import sklearn
+import numpy as np
+
+from scipy.spatial.distance import cdist
+
+# samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
+querypoint = [[50, 65]]
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/point_sample_10.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        samples.append([row['x'], row['y']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/sample_ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['x'], row['y']])
+
+print(refpoints)
+
+print(samples)
+print("Actual distances:")
+print(cdist(samples, querypoint))
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+neigh = NearestNeighbors(radius=10)
+tree = KDTree(samples)
+tq = tree.query(querypoint, k=2)
+print("tree query results:")
+print(tq)
+neigh.fit(samples)
+
+print("finding neighbors within radius:")
+rng = neigh.radius_neighbors([[44., 44.]])
+print("distances:")
+print(np.asarray(rng[0][0]))
+print("indexes:")
+print(list(np.asarray(rng[1][0])))
+print("points:")
+things = [samples[x] for x in list(np.asarray(rng[1][0]))]
+print(things)
+# [samples[x] for x in list(np.asarray(rng[1][0]))]
+# print(samples[list(np.asarray(rng[1][0]))])
+
+print(type(tree))
+print(type(tree.data))
+# dist, ind = tree.query
+print(np.asarray(tree.node_data))  # Ohhh https://stackoverflow.com/questions/20978377/cython-convert-memory-view-to-numpy-array
+print(tree.node_data[0])
+
+ti = TrilaterationIndex(samples, refpoints)
+
+
+
+print(np.asarray(ti.ref_points))
+print(np.asarray(ti.idx_array))
+print("distances:")
+print(np.asarray(ti.distances))
+
+print(ti.ref_points.shape[0])
+print(ti.ref_points.shape[1])
+
+result1 = ti.query(X=np.asarray(querypoint), k=1)
+print(result1)
+
+
+
+t2 = TrilaterationIndex(samples)
+result2 = t2.query(X=np.asarray(querypoint), k=1)
+print(result2)
+print("This should be 14.30117 something")

From e9f18bde21fdfb07b17e77a83f58d5c5f8001163 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Thu, 11 Feb 2021 14:59:44 -0800
Subject: [PATCH 05/19] Added working heap and k-nn (beyond 1-nn) support

---
 sklearn/neighbors/_helpers.pxi       |  9 +++++
 sklearn/neighbors/_trilateration.pyx | 56 +++++++++++++++++-----------
 trilat_test_10.py                    |  9 ++++-
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/sklearn/neighbors/_helpers.pxi b/sklearn/neighbors/_helpers.pxi
index 2306f573a9afd..07d187eaf4e1f 100644
--- a/sklearn/neighbors/_helpers.pxi
+++ b/sklearn/neighbors/_helpers.pxi
@@ -70,6 +70,11 @@ cdef class NeighborsHeap:
         self.distances = get_memview_DTYPE_2D(self.distances_arr)
         self.indices = get_memview_ITYPE_2D(self.indices_arr)
 
+    def get_max(self, row):
+        """Get the max distance and index from the heap for a given row
+        """
+        return self.largest(row), self.largest_idx(row)
+
     def get_arrays(self, sort=True):
         """Get the arrays of distances and indices within the heap.
 
@@ -83,6 +88,10 @@ cdef class NeighborsHeap:
     cdef inline DTYPE_t largest(self, ITYPE_t row) nogil except -1:
         """Return the largest distance in the given row"""
         return self.distances[row, 0]
+    
+    cdef inline ITYPE_t largest_idx(self, ITYPE_t row) nogil except -1:
+        """Return the index for the largest distance in the given row"""
+        return self.indices[row, 0]
 
     def push(self, ITYPE_t row, DTYPE_t val, ITYPE_t i_val):
         return self._push(row, val, i_val)
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 849c3e1f7bf52..0bc83ef131eb2 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -141,11 +141,6 @@ cdef class TrilaterationIndex:
         query the index for k nearest neighbors
         """
 
-        if k != 1:
-            raise ValueError("Right now this only works for k=1")
-
-        print(f"X.shape: {X.shape}")
-
         if X.shape[0] == 1:
             results=self._query_one(X, k, return_distance, sort_results)
         else:
@@ -174,7 +169,10 @@ cdef class TrilaterationIndex:
             raise ValueError("k must be less than or equal "
                              "to the number of training points")
 
+        # Can probably improve this - using a heap that allows more than 1 value
+        # but we're always only using one here (X.shape[0] must be 1 from above)
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
+        print(f"initialized heap: {heap.get_arrays(sort=False)}")
 
         # Establish the distances from the query point to the reference points
         cdef np.ndarray q_dists
@@ -191,20 +189,33 @@ cdef class TrilaterationIndex:
         # best_dist = self.dist_metric.dist(self.data[best_idx,:], &Xarr, X.shape[1])
         # print(self.data_arr[best_idx,:].shape)
         best_dist = cdist([self.data_arr[self.idx_array_arr[best_idx],:]], X)
+        # heap.push(0, best_dist, best_idx)
+        # print(f"max heap: {heap.get_max(0)}")
+        # print(f"largest: {heap.largest(0)}")
 
         print(f"first best guess: {best_idx} data[{self.idx_array_arr[best_idx]}]")
         print(best_idx)
         print(np.asarray(self.data[best_idx,:]))
 
-        print("starting best distance")
-        print(best_dist)
+        # Populate the heap using 2k elements; k on each side of our first guess:
+        low_idx = max(best_idx - k, 0)
+        high_idx = min(best_idx + k, self.distances.shape[0])
+        print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible} with low_idx: {low_idx}, high_idx: {high_idx}")
+        for i in range(low_idx, high_idx + 1):
+            if i < self.distances.shape[0]:
+                test_dist = cdist([self.data_arr[self.idx_array_arr[i],:]], X)
+                heap.push(0, test_dist, self.idx_array_arr[i])
+
+        print("starting best distance heap")
+        print(f"{heap.get_arrays(sort=False)}")
 
         # Establish bounds between which to search
-        low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - best_dist)
-        high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + best_dist)
-        low_idx = max(best_idx - 1, 0)
-        high_idx = min(best_idx + 1, self.distances.shape[0])
-        print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible}")
+        if heap.largest(0) != np.inf:
+            low_idx_possible = 0
+            high_idx_possible = self.data_arr.shape[0]
+        else:
+            low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - heap.largest(0))
+            high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + heap.largest(0))
 
         # Consider adding chunking here
         # So we test more than one new point at a time
@@ -215,6 +226,8 @@ cdef class TrilaterationIndex:
                 print(f"breaking because {low_idx} <= {low_idx_possible} and {high_idx} >= {high_idx_possible}")
                 break
 
+            print(f"heap.largest(0) = {heap.largest(0)}")
+
             # Determine whether the next high or low point is a better test:
             lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
             highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
@@ -236,26 +249,27 @@ cdef class TrilaterationIndex:
             print(f"testing point at index {test_idx}: data[{self.idx_array[test_idx]}] {np.asarray(self.data[self.idx_array[test_idx],:])}")
             # Check that all pre-calculated distances are better than best
             sufficient = True
+
             for d, q in zip(self.distances[test_idx,1:], q_dists[0,1:]):
-                print(f"testing that: {abs(d-q)} < {best_dist}")
-                if abs(d-q) > best_dist:
+                print(f"testing that: {abs(d-q)} < {heap.largest(0)}")
+                if abs(d-q) > heap.largest(0):
                     sufficient = False
                     break
             
             if sufficient:
                 test_dist = cdist([self.data_arr[self.idx_array[test_idx],:]], X)
                 print(f"{test_idx} is sufficient... test_dist is: {test_dist}")
-                if test_dist < best_dist:
-                    print("replacing best with test...")
-                    print(f"new best idx: {best_idx} ({self.idx_array[best_idx]}) with dist: {test_dist}")
+                if test_dist < heap.largest(0):
+                    heap.push(0, test_dist, self.idx_array[test_idx])
+                    print(f"pushing idx: {best_idx} ({self.idx_array[best_idx]}) with dist: {test_dist}")
                     best_idx = test_idx
-                    best_dist = test_dist
-                    low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - best_dist)
-                    high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + best_dist)
+                    low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - heap.largest(0))
+                    high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + heap.largest(0))
 
             continue
 
-        return (self.idx_array[best_idx], best_dist)
+        # return (self.idx_array[best_idx], best_dist)
+        return heap.get_arrays()
 
 
 
diff --git a/trilat_test_10.py b/trilat_test_10.py
index 422ecb1220f82..c69c7740f90f5 100644
--- a/trilat_test_10.py
+++ b/trilat_test_10.py
@@ -35,11 +35,12 @@
 from sklearn.neighbors import NearestNeighbors, KDTree
 from sklearn.neighbors import TrilaterationIndex
 
-neigh = NearestNeighbors(radius=10)
 tree = KDTree(samples)
 tq = tree.query(querypoint, k=2)
 print("tree query results:")
 print(tq)
+
+neigh = NearestNeighbors(radius=10)
 neigh.fit(samples)
 
 print("finding neighbors within radius:")
@@ -81,3 +82,9 @@
 result2 = t2.query(X=np.asarray(querypoint), k=1)
 print(result2)
 print("This should be 14.30117 something")
+
+
+t2 = TrilaterationIndex(samples)
+result2 = t2.query(X=np.asarray(querypoint), k=3)
+print(result2)
+print("This should be 14.30117 something")
\ No newline at end of file

From 605e52b0a6f08e6f6de1863ab8937184218bc76d Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Thu, 11 Feb 2021 16:02:28 -0800
Subject: [PATCH 06/19] Remove evil print statements

---
 sklearn/neighbors/_trilateration.pyx | 40 +++++++++++++++-------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 0bc83ef131eb2..428a69258603a 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -71,8 +71,8 @@ cdef class TrilaterationIndex:
         # Otherwise they are generated based on input data
         if ref is None:
             ref = self._create_ref_points()
-            print("generated synthetic reference points:")
-            print(ref)
+            # print("generated synthetic reference points:")
+            # print(ref)
 
         self.ref_points_arr = check_array(ref, dtype=DTYPE, order='C')
 
@@ -141,7 +141,9 @@ cdef class TrilaterationIndex:
         query the index for k nearest neighbors
         """
 
-        if X.shape[0] == 1:
+        if isinstance(X, list):
+            results = self._query_one(np.asarray(X), k, return_distance, sort_results)
+        elif X.shape[0] == 1:
             results=self._query_one(X, k, return_distance, sort_results)
         else:
             results = [self._query_one(x, k=k,
@@ -151,7 +153,7 @@ cdef class TrilaterationIndex:
 
         return results
 
-    def _query_one(self, X, k=5,
+    cdef _query_one(self, X, k=5,
               return_distance=True,
               sort_results=True):
         """
@@ -172,7 +174,7 @@ cdef class TrilaterationIndex:
         # Can probably improve this - using a heap that allows more than 1 value
         # but we're always only using one here (X.shape[0] must be 1 from above)
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
-        print(f"initialized heap: {heap.get_arrays(sort=False)}")
+        # print(f"initialized heap: {heap.get_arrays(sort=False)}")
 
         # Establish the distances from the query point to the reference points
         cdef np.ndarray q_dists
@@ -193,21 +195,21 @@ cdef class TrilaterationIndex:
         # print(f"max heap: {heap.get_max(0)}")
         # print(f"largest: {heap.largest(0)}")
 
-        print(f"first best guess: {best_idx} data[{self.idx_array_arr[best_idx]}]")
-        print(best_idx)
-        print(np.asarray(self.data[best_idx,:]))
+        # print(f"first best guess: {best_idx} data[{self.idx_array_arr[best_idx]}]")
+        # print(best_idx)
+        # print(np.asarray(self.data[best_idx,:]))
 
         # Populate the heap using 2k elements; k on each side of our first guess:
         low_idx = max(best_idx - k, 0)
         high_idx = min(best_idx + k, self.distances.shape[0])
-        print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible} with low_idx: {low_idx}, high_idx: {high_idx}")
+        # print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible} with low_idx: {low_idx}, high_idx: {high_idx}")
         for i in range(low_idx, high_idx + 1):
             if i < self.distances.shape[0]:
                 test_dist = cdist([self.data_arr[self.idx_array_arr[i],:]], X)
                 heap.push(0, test_dist, self.idx_array_arr[i])
 
-        print("starting best distance heap")
-        print(f"{heap.get_arrays(sort=False)}")
+        # print("starting best distance heap")
+        # print(f"{heap.get_arrays(sort=False)}")
 
         # Establish bounds between which to search
         if heap.largest(0) != np.inf:
@@ -223,16 +225,16 @@ cdef class TrilaterationIndex:
 
         while True:
             if low_idx <= low_idx_possible and high_idx >= high_idx_possible:
-                print(f"breaking because {low_idx} <= {low_idx_possible} and {high_idx} >= {high_idx_possible}")
+                # print(f"breaking because {low_idx} <= {low_idx_possible} and {high_idx} >= {high_idx_possible}")
                 break
 
-            print(f"heap.largest(0) = {heap.largest(0)}")
+            # print(f"heap.largest(0) = {heap.largest(0)}")
 
             # Determine whether the next high or low point is a better test:
             lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
             highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
-            print(f"comparing: {low_idx}, {high_idx}")
-            print(f"lowdelta: {lowdelta}, highdelta: {highdelta}")
+            # print(f"comparing: {low_idx}, {high_idx}")
+            # print(f"lowdelta: {lowdelta}, highdelta: {highdelta}")
             if lowdelta <= highdelta and low_idx >= low_idx_possible:
                 test_idx = low_idx
                 low_idx = low_idx - 1
@@ -246,22 +248,22 @@ cdef class TrilaterationIndex:
                 print("why?")
                 break
 
-            print(f"testing point at index {test_idx}: data[{self.idx_array[test_idx]}] {np.asarray(self.data[self.idx_array[test_idx],:])}")
+            # print(f"testing point at index {test_idx}: data[{self.idx_array[test_idx]}] {np.asarray(self.data[self.idx_array[test_idx],:])}")
             # Check that all pre-calculated distances are better than best
             sufficient = True
 
             for d, q in zip(self.distances[test_idx,1:], q_dists[0,1:]):
-                print(f"testing that: {abs(d-q)} < {heap.largest(0)}")
+                # print(f"testing that: {abs(d-q)} < {heap.largest(0)}")
                 if abs(d-q) > heap.largest(0):
                     sufficient = False
                     break
             
             if sufficient:
                 test_dist = cdist([self.data_arr[self.idx_array[test_idx],:]], X)
-                print(f"{test_idx} is sufficient... test_dist is: {test_dist}")
+                # print(f"{test_idx} is sufficient... test_dist is: {test_dist}")
                 if test_dist < heap.largest(0):
                     heap.push(0, test_dist, self.idx_array[test_idx])
-                    print(f"pushing idx: {best_idx} ({self.idx_array[best_idx]}) with dist: {test_dist}")
+                    # print(f"pushing idx: {best_idx} ({self.idx_array[best_idx]}) with dist: {test_dist}")
                     best_idx = test_idx
                     low_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] - heap.largest(0))
                     high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + heap.largest(0))

From 7a1973233bd8fc1c791e9aeb2b3f89c00b9725d9 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Fri, 12 Feb 2021 14:39:07 -0800
Subject: [PATCH 07/19] WIP commit - adding query_radius

---
 sklearn/neighbors/_helpers.pxi       |   9 ++
 sklearn/neighbors/_trilateration.pyx | 118 +++++++++++++++++++++++++--
 2 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/sklearn/neighbors/_helpers.pxi b/sklearn/neighbors/_helpers.pxi
index 07d187eaf4e1f..4ecc6569845ba 100644
--- a/sklearn/neighbors/_helpers.pxi
+++ b/sklearn/neighbors/_helpers.pxi
@@ -35,6 +35,15 @@ cdef int _find_nearest_sorted_2D(DTYPE_t[:,:] rdists, DTYPE_t target):
         idx = idx - 1
     return idx
 
+cdef int _find_nearest_sorted_1D(DTYPE_t[:] rdists, DTYPE_t target):
+    """ rdists must be sorted by its first column.
+    Can we do this faster with Memoryviews?  See _simultaneous_sort
+    """
+    cdef int idx
+    idx = np.searchsorted(rdists[:], target, side="left")
+    if idx > 0 and (idx == len(rdists) or fabs(target - rdists[idx-1]) < fabs(target - rdists[idx])):
+        idx = idx - 1
+    return idx
 
 cdef class NeighborsHeap:
     """A max-heap structure to keep track of distances/indices of neighbors
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 428a69258603a..b73d31e4553d8 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -53,6 +53,11 @@ cdef class TrilaterationIndex:
     cdef np.ndarray idx_array_arr
     cdef np.ndarray r0sortorder
     cdef np.ndarray ref_dists
+    cdef np.ndarray r_indexes
+    cdef np.ndarray r_distances
+
+    cdef readonly ITYPE_t[:, ::1] r_indexes_mv
+    cdef readonly DTYPE_t[:, ::1] r_distances_mv
 
     cdef readonly DTYPE_t[:, ::1] data
     cdef readonly DTYPE_t[:, ::1] ref_points
@@ -109,10 +114,17 @@ cdef class TrilaterationIndex:
         self.ref_dists = self.ref_dists[r0sortorder]
         self.idx_array_arr = self.idx_array_arr[r0sortorder]
 
-        self._update_memviews()
-        # _find_nearest_sorted_2D(self.distances, -2)
-        # _find_nearest_sorted_2D(self.distances, 62)
-        # _find_nearest_sorted_2D(self.distances, 100)
+        # Build ADDITIONAL indexes each sorted by the specific ref dists
+        # Note that self.distances/self.ref_dists are ONLY sorted by r0 dist
+        self.distances = get_memview_DTYPE_2D(self.ref_dists)
+        self.data = get_memview_DTYPE_2D(self.data_arr)
+        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
+        self.ref_points = get_memview_DTYPE_2D(self.ref_points_arr)
+
+        self._build_r_indexes()
+        self.r_indexes_mv = get_memview_ITYPE_2D(self.r_indexes)
+        self.r_distances_mv = get_memview_DTYPE_2D(self.r_distances)
+
 
     def _create_ref_points(self):
         """
@@ -128,11 +140,28 @@ cdef class TrilaterationIndex:
         refs = [[0]*i+[MAX_REF]*(ndims-i) for i in range(ndims)]
         return np.asarray(refs)
 
-    def _update_memviews(self):
-        self.data = get_memview_DTYPE_2D(self.data_arr)
-        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
-        self.ref_points = get_memview_DTYPE_2D(self.ref_points_arr)
-        self.distances = get_memview_DTYPE_2D(self.ref_dists)
+
+    cdef _build_r_indexes(self):
+        """
+        Build a 2D array
+        """
+        self.r_indexes = np.zeros((self.distances.shape[1], self.distances.shape[0]), dtype=ITYPE, order='C')
+        self.r_distances = np.zeros((self.distances.shape[1], self.distances.shape[0]), dtype=DTYPE, order='C')
+
+        for i in range(self.distances.shape[1]):
+            print(type(self.idx_array), self.idx_array.shape)
+            print(self.idx_array)
+            print(type(self.distances), self.distances.shape)
+            self.r_indexes[i,] = self.idx_array
+            self.r_distances[i,] = self.distances[:,i]
+            if i > 0:  # At this moment, the first column is already sorted
+                sortorder = np.argsort(self.r_distances[i,])
+                self.r_indexes[i,] = self.r_indexes[i,sortorder]
+                self.r_distances[i,] = self.r_distances[i, sortorder]
+
+        print(self.r_indexes)
+        print(self.r_distances)
+        return 0
 
     def query(self, X, k=5,
               return_distance=True,
@@ -273,6 +302,49 @@ cdef class TrilaterationIndex:
         # return (self.idx_array[best_idx], best_dist)
         return heap.get_arrays()
 
+    cdef _query_expand(self, X, k=5,
+              start_dist = 0.01,
+              return_distance=True,
+              sort_results=True):
+        """
+        return k-nn by the expanding method...
+        starting with start_dist, iterate over query_radius
+        while expanding and contracting the radius to
+        arrive at a good result
+        """
+
+        cdef DTYPE_t radius, too_high, too_low, new_radius, STD_SCALE, MAX_FUDGE
+        cdef ITYPE_t approx_count, i, MAX_ITER
+
+        MAX_ITER = 20
+        MAX_FUDGE = 5.0
+        STD_SCALE = 2.0
+
+        radius = start_dist
+        too_low = 0
+        too_high = np.inf
+        approx_count = self.query_radius_approx(X, radius)
+
+        for i in range(MAX_ITER):
+            if approx_count == k:
+                break
+            elif approx_count < k:
+                too_low = radius
+                new_radius = start_dist * STD_SCALE
+                if new_radius > too_high:
+                    new_radius = (too_high + radius) / STD_SCALE
+                radius = new_radius
+                approx_count = self.query_radius_approx(X, radius)
+                continue
+            elif approx_count > k * MAX_FUDGE:
+                radius = (radius + too_low) / STD_SCALE
+                approx_count = self.query_radius_approx(X, radius)
+            else:
+                continue
+        
+        return too_low, too_high, approx_count
+        
+
 
 
     def query_radius(self, X, r,
@@ -291,3 +363,31 @@ cdef class TrilaterationIndex:
                              "if sort_results is True")
 
     pass
+
+    def query_radius_approx(self, X, r):
+        """
+        query the index for neighbors within a radius r
+        return approximate results only (by relying on index distances)
+        """
+        result = self._query_radius_approx(X, r)
+        return result
+
+    cdef _query_radius_approx(self, X, r):
+
+        # The main self.idx_array and self.distances sort ONLY by the first
+        # reference distance.  Here we sort by all three.
+        # Tune this some day...
+
+        cdef np.ndarray points_in_range = np.asarray([0])
+        cdef np.ndarray q_dists
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+
+        for i in range(self.r_indexes_mv.shape[1]):
+            low_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, 0] - r)
+            high_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, 0] + r)
+            
+            print(i)
+
+
+        return points_in_range
+

From 6daca29e7f44042e25889d5c2fa5f15aa3a9ec5d Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Fri, 12 Feb 2021 16:46:29 -0800
Subject: [PATCH 08/19] query_radius_approx is working, woot

---
 sklearn/neighbors/_trilateration.pyx | 31 ++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index b73d31e4553d8..f8affa813137f 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -21,6 +21,8 @@
 
 cimport numpy as np
 import numpy as np
+
+from functools import reduce
 from libc.math cimport fabs
 
 from scipy.spatial.distance import cdist
@@ -378,16 +380,27 @@ cdef class TrilaterationIndex:
         # reference distance.  Here we sort by all three.
         # Tune this some day...
 
-        cdef np.ndarray points_in_range = np.asarray([0])
+        # cdef np.ndarray points_in_range = np.arange(self.r_indexes_mv.shape[0]).reshape(self.r_indexes_mv.shape[0])
         cdef np.ndarray q_dists
-        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
-
-        for i in range(self.r_indexes_mv.shape[1]):
-            low_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, 0] - r)
-            high_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, 0] + r)
-            
-            print(i)
 
+        points_in_range = []
 
-        return points_in_range
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+        print(f"q_dists: {q_dists[0]} - {q_dists[0, 1]}")
+        print(f"r.indexes.shape: {self.r_indexes_mv.shape}")
+
+        for i in range(self.r_indexes_mv.shape[0]):
+            # low_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] - r)
+            low_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] - r, side="left")
+            # high_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] + r)
+            high_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] + r, side="right")
+
+            print(f"low_idx: {low_idx}, high_idx: {high_idx}")
+            print(f"valid IDS: {self.r_indexes[i, low_idx:high_idx]}")
+            points_in_range.append(self.r_indexes[i, low_idx:high_idx])
+            print(i)
+        
+        common_idx = reduce(np.intersect1d, points_in_range)
+        print(f"commonIDs: {common_idx}")
+        return common_idx
 

From c6dd6c63dbe8919e8ff9d5e423ca1af238d429f9 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Fri, 12 Feb 2021 17:58:07 -0800
Subject: [PATCH 09/19] Query expand is working!

---
 debug_query_radius.py                | 86 ++++++++++++++++++++++++++++
 sklearn/neighbors/_trilateration.pyx | 76 ++++++++++++++++++------
 2 files changed, 146 insertions(+), 16 deletions(-)
 create mode 100644 debug_query_radius.py

diff --git a/debug_query_radius.py b/debug_query_radius.py
new file mode 100644
index 0000000000000..c17953f73a615
--- /dev/null
+++ b/debug_query_radius.py
@@ -0,0 +1,86 @@
+# File to debug
+# This file should never be published...
+# Purge it from git before merging
+
+import csv
+import sklearn
+import numpy as np
+
+from scipy.spatial.distance import cdist
+
+# samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
+querypoint = [[50, 65]]
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/point_sample_10.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        samples.append([row['x'], row['y']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/sample_ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['x'], row['y']])
+
+print(refpoints)
+
+print(samples)
+print("Actual distances:")
+print(cdist(samples, querypoint))
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+tree = KDTree(samples)
+tq = tree.query_radius(querypoint, r=20)
+print("tree query results:")
+print(tq)
+
+neigh = NearestNeighbors(radius=10)
+neigh.fit(samples)
+
+print("finding neighbors within radius:")
+rng = neigh.radius_neighbors([[44., 44.]])
+print("distances:")
+print(np.asarray(rng[0][0]))
+print("indexes:")
+print(list(np.asarray(rng[1][0])))
+print("points:")
+things = [samples[x] for x in list(np.asarray(rng[1][0]))]
+print(things)
+# [samples[x] for x in list(np.asarray(rng[1][0]))]
+# print(samples[list(np.asarray(rng[1][0]))])
+
+print(type(tree))
+print(type(tree.data))
+# dist, ind = tree.query
+print(np.asarray(tree.node_data))  # Ohhh https://stackoverflow.com/questions/20978377/cython-convert-memory-view-to-numpy-array
+print(tree.node_data[0])
+
+ti = TrilaterationIndex(samples, refpoints)
+
+
+
+print(np.asarray(ti.ref_points))
+print(np.asarray(ti.idx_array))
+print("distances:")
+print(np.asarray(ti.distances))
+
+print(ti.ref_points.shape[0])
+print(ti.ref_points.shape[1])
+
+result1 = ti.query_radius_approx(np.asarray(querypoint), 20)
+print(f"query_radius_approx: {result1}")
+
+
+result2 = ti.query_radius_approx(np.asarray(querypoint), 20)
+print(f"query_radius: {result2}")
+
+print("------------")
+
+r3 = ti.query_expand(np.asarray(querypoint), 3)
+print(f"query_expand: {r3}")
\ No newline at end of file
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index f8affa813137f..524293aa51399 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -23,7 +23,7 @@ cimport numpy as np
 import numpy as np
 
 from functools import reduce
-from libc.math cimport fabs
+from libc.math cimport fabs, ceil
 
 from scipy.spatial.distance import cdist
 
@@ -304,49 +304,79 @@ cdef class TrilaterationIndex:
         # return (self.idx_array[best_idx], best_dist)
         return heap.get_arrays()
 
-    cdef _query_expand(self, X, k=5,
-              start_dist = 0.01,
-              return_distance=True,
-              sort_results=True):
+    cdef _get_start_dist(self, X, k):
+        """
+        For the expanding radius approach, we must make an initial guess
+        for a radius that may contain exactly k members.
+        We do this by selecting the R0 reference distances, and simply
+        expanding to k items, and returning the distance which separates
+        the k points along that sole axis
+        """
+        cdef np.ndarray q_dists
+        cdef ITYPE_t low_idx, high_idx
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+        print(f"q_dists: {q_dists}")
+        
+        ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
+        # ground_idx = np.searchsorted(self.distances, q_dists[0, 0], side="left")
+        low_idx = ground_idx - (k//2)
+        high_idx = ground_idx + ceil(k/2)
+
+        print(f"get_start_dist is {self.distances[high_idx, 0] - self.distances[low_idx, 0]}")
+        return(self.distances[high_idx, 0] - self.distances[low_idx, 0])
+
+    def query_expand(self, X, k, return_distance=True, sort_results=True):
+        return np.asarray(self._query_expand(X, k, return_distance, sort_results))
+
+    cdef _query_expand(self, X, k,
+              return_distance,
+              sort_results):
         """
         return k-nn by the expanding method...
-        starting with start_dist, iterate over query_radius
+        select a starting radius, iterate over query_radius
         while expanding and contracting the radius to
         arrive at a good result
         """
 
         cdef DTYPE_t radius, too_high, too_low, new_radius, STD_SCALE, MAX_FUDGE
         cdef ITYPE_t approx_count, i, MAX_ITER
+        cdef ITYPE_t[::1] approx_array
+
 
         MAX_ITER = 20
-        MAX_FUDGE = 5.0
+        MAX_FUDGE = 1.0  # MAX_ITER should handle things...
         STD_SCALE = 2.0
 
-        radius = start_dist
+        radius = self._get_start_dist(X, k)
+
         too_low = 0
         too_high = np.inf
-        approx_count = self.query_radius_approx(X, radius)
+        print(f"testing query_radius_approx with: {X}, {radius}")
+        approx_array = self._query_radius_approx(X, radius)
+        approx_count = approx_array.shape[0]
 
         for i in range(MAX_ITER):
             if approx_count == k:
                 break
             elif approx_count < k:
+                print(f"{approx_count} is too few at radius {radius}")
                 too_low = radius
-                new_radius = start_dist * STD_SCALE
+                new_radius = radius * STD_SCALE
                 if new_radius > too_high:
                     new_radius = (too_high + radius) / STD_SCALE
                 radius = new_radius
-                approx_count = self.query_radius_approx(X, radius)
+                approx_array = self._query_radius_approx(X, radius)
+                approx_count = approx_array.shape[0]
                 continue
             elif approx_count > k * MAX_FUDGE:
+                print(f"{approx_count} is too many at radius {radius}")
                 radius = (radius + too_low) / STD_SCALE
-                approx_count = self.query_radius_approx(X, radius)
+                approx_array = self._query_radius_approx(X, radius)
+                approx_count = approx_array.shape[0]
             else:
                 continue
         
-        return too_low, too_high, approx_count
-        
-
+        return approx_array
 
 
     def query_radius(self, X, r,
@@ -364,7 +394,21 @@ cdef class TrilaterationIndex:
             raise ValueError("return_distance must be True "
                              "if sort_results is True")
 
-    pass
+        cdef ITYPE_t[::1] idx_within_r
+
+        if isinstance(X, list):
+            idx_witin_r = self._query_radius_approx([X], r)
+        elif X.shape[0] == 1:
+            idx_witin_r = self._query_radius_approx(X, r)
+        else:
+            idx_witin_r = [self._query_radius_approx(x, r) for x in X]
+
+        if return_distance:
+            dists_within_r = self.dist_metric.pairwise(self.data_arr[idx_within_r], X)
+            return idx_witin_r, dists_within_r
+
+        return idx_witin_r
+
 
     def query_radius_approx(self, X, r):
         """

From 3ec876ac7c80bcb676f8ad9eda8db60d3a637ff6 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Sat, 13 Feb 2021 16:02:23 -0800
Subject: [PATCH 10/19] query and query_expand work with ann-benchmarks

---
 debug_query_radius.py                |  41 ++++++++-
 sklearn/neighbors/_trilateration.pyx | 133 +++++++++++++++++----------
 2 files changed, 122 insertions(+), 52 deletions(-)

diff --git a/debug_query_radius.py b/debug_query_radius.py
index c17953f73a615..5b9ea31be65c4 100644
--- a/debug_query_radius.py
+++ b/debug_query_radius.py
@@ -80,7 +80,44 @@
 result2 = ti.query_radius_approx(np.asarray(querypoint), 20)
 print(f"query_radius: {result2}")
 
+rq = ti.query_radius(np.asarray(querypoint), 20)
+print(f"rq: {rq}")
+
+print("------------")
+print("single point tests")
+r4 = ti.query(querypoint, 3)
+ids, dists = r4
+print(f"query: {r4}")
+print(f"indexes: {ids}, dists: {dists}")
+
+r3 = ti.query_expand(querypoint, 3)
+ids, dists = r3
+print(f"query_expand: {r3}")
+print(f"indexes: {np.asarray(ids)}", dists)
+
+print("Passed Single Point tests")
 print("------------")
+print("Starting multi point tests")
+
+querypoints = np.asarray([np.asarray([50, 65]),
+               np.asarray([55, 70]),
+               np.asarray([45, 80])])
+
+r4 = ti.query(querypoints, 3)
+print(f"query results r4: {r4}")
+print(f"r4.shape: {np.asarray(r4).shape}")
+
+ids, dists = r4
+print(f"indexes: {ids}, dists: {dists}")
+
+r3 = ti.query_expand(querypoints, 3)
+ids, dists = r3
+print(f"query_expand: {r3}")
+print(f"indexes: {np.asarray(ids)}", dists)
+
+# [(array([[14.30117536, 15.82017495, 16.12663017]]), array([[0, 5, 3]])),
+#  (array([[16.08773716, 16.85576594, 16.85576594]]), array([[5, 0, 0]])),
+#  (array([[ 4.37084771, 23.74124599, 23.74124599]]), array([[5, 3, 3]]))]
 
-r3 = ti.query_expand(np.asarray(querypoint), 3)
-print(f"query_expand: {r3}")
\ No newline at end of file
+# cython sklearn/neighbors/_trilateration.pyx -a
+# cython sklearn/neighbors/_kd_tree.pyx -a
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 524293aa51399..17aa3ddae60a8 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -151,9 +151,6 @@ cdef class TrilaterationIndex:
         self.r_distances = np.zeros((self.distances.shape[1], self.distances.shape[0]), dtype=DTYPE, order='C')
 
         for i in range(self.distances.shape[1]):
-            print(type(self.idx_array), self.idx_array.shape)
-            print(self.idx_array)
-            print(type(self.distances), self.distances.shape)
             self.r_indexes[i,] = self.idx_array
             self.r_distances[i,] = self.distances[:,i]
             if i > 0:  # At this moment, the first column is already sorted
@@ -161,8 +158,6 @@ cdef class TrilaterationIndex:
                 self.r_indexes[i,] = self.r_indexes[i,sortorder]
                 self.r_distances[i,] = self.r_distances[i, sortorder]
 
-        print(self.r_indexes)
-        print(self.r_distances)
         return 0
 
     def query(self, X, k=5,
@@ -177,10 +172,12 @@ cdef class TrilaterationIndex:
         elif X.shape[0] == 1:
             results=self._query_one(X, k, return_distance, sort_results)
         else:
-            results = [self._query_one(x, k=k,
-                              return_distance=return_distance,
-                              sort_results=sort_results) \
-                       for x in X]
+            raise NotImplementedError("Not yet")
+            # [print(x) for x in X]
+            # results = [self._query_one(np.asarray([x]), k=k,
+            #                   return_distance=return_distance,
+            #                   sort_results=sort_results) \
+            #            for x in X]
 
         return results
 
@@ -205,7 +202,6 @@ cdef class TrilaterationIndex:
         # Can probably improve this - using a heap that allows more than 1 value
         # but we're always only using one here (X.shape[0] must be 1 from above)
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
-        # print(f"initialized heap: {heap.get_arrays(sort=False)}")
 
         # Establish the distances from the query point to the reference points
         cdef np.ndarray q_dists
@@ -220,27 +216,17 @@ cdef class TrilaterationIndex:
 
         best_idx = _find_nearest_sorted_2D(self.distances, q_dists[0,0])
         # best_dist = self.dist_metric.dist(self.data[best_idx,:], &Xarr, X.shape[1])
-        # print(self.data_arr[best_idx,:].shape)
         best_dist = cdist([self.data_arr[self.idx_array_arr[best_idx],:]], X)
         # heap.push(0, best_dist, best_idx)
-        # print(f"max heap: {heap.get_max(0)}")
-        # print(f"largest: {heap.largest(0)}")
-
-        # print(f"first best guess: {best_idx} data[{self.idx_array_arr[best_idx]}]")
-        # print(best_idx)
-        # print(np.asarray(self.data[best_idx,:]))
 
         # Populate the heap using 2k elements; k on each side of our first guess:
         low_idx = max(best_idx - k, 0)
         high_idx = min(best_idx + k, self.distances.shape[0])
-        # print(f"low_idx_possible: {low_idx_possible}; high_idx_possible: {high_idx_possible} with low_idx: {low_idx}, high_idx: {high_idx}")
         for i in range(low_idx, high_idx + 1):
             if i < self.distances.shape[0]:
                 test_dist = cdist([self.data_arr[self.idx_array_arr[i],:]], X)
                 heap.push(0, test_dist, self.idx_array_arr[i])
 
-        # print("starting best distance heap")
-        # print(f"{heap.get_arrays(sort=False)}")
 
         # Establish bounds between which to search
         if heap.largest(0) != np.inf:
@@ -256,16 +242,11 @@ cdef class TrilaterationIndex:
 
         while True:
             if low_idx <= low_idx_possible and high_idx >= high_idx_possible:
-                # print(f"breaking because {low_idx} <= {low_idx_possible} and {high_idx} >= {high_idx_possible}")
                 break
 
-            # print(f"heap.largest(0) = {heap.largest(0)}")
-
             # Determine whether the next high or low point is a better test:
             lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
             highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
-            # print(f"comparing: {low_idx}, {high_idx}")
-            # print(f"lowdelta: {lowdelta}, highdelta: {highdelta}")
             if lowdelta <= highdelta and low_idx >= low_idx_possible:
                 test_idx = low_idx
                 low_idx = low_idx - 1
@@ -314,23 +295,55 @@ cdef class TrilaterationIndex:
         """
         cdef np.ndarray q_dists
         cdef ITYPE_t low_idx, high_idx
+
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
-        print(f"q_dists: {q_dists}")
         
         ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
         # ground_idx = np.searchsorted(self.distances, q_dists[0, 0], side="left")
         low_idx = ground_idx - (k//2)
         high_idx = ground_idx + ceil(k/2)
 
-        print(f"get_start_dist is {self.distances[high_idx, 0] - self.distances[low_idx, 0]}")
         return(self.distances[high_idx, 0] - self.distances[low_idx, 0])
 
     def query_expand(self, X, k, return_distance=True, sort_results=True):
-        return np.asarray(self._query_expand(X, k, return_distance, sort_results))
+        """
+            X can be what...?
+            A singlie list of one coordinate: [50, 25, 100]
+        """
+        cdef np.ndarray idx_results
+        # cdef DTYPE_t[::1] dist_results
+        cdef np.ndarray dist_results
+
+
+        if isinstance(X, list):
+            X = np.asarray(X)
+
+        cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
+
+        if X.shape[0] == 1:
+            idx_results = np.asarray(self._query_expand(X, k))
+
+        else:
+            raise NotImplementedError("You can't do that yet")
+            # [print(x) for x in X]
+            # all_results = [self._query_expand(x, k) for x in X]
+            # print(f"all_results: {all_results}")
+
+        dist_results = self.dist_metric.pairwise(self.data_arr[idx_results], X)
+        # best = np.argsort(dist_results)
+        # dist_results = dist_results[best]
+        # idx_results = idx_results[best]
+
+        for idx, dist in zip(idx_results, dist_results):
+            heap.push(0, dist, idx)
+
+        if return_distance:
+            return heap.get_arrays()
 
-    cdef _query_expand(self, X, k,
-              return_distance,
-              sort_results):
+        return idx_results
+
+
+    cdef _query_expand(self, X, k):
         """
         return k-nn by the expanding method...
         select a starting radius, iterate over query_radius
@@ -338,20 +351,27 @@ cdef class TrilaterationIndex:
         arrive at a good result
         """
 
+        if X.shape[0] != 1:
+            raise ValueError("_query takes only a single X point"
+                             "use query for multiple records")
+
+        if X.shape[X.ndim - 1] != self.data.shape[1]:
+            raise ValueError("query data dimension must "
+                             "match training data dimension")
+
         cdef DTYPE_t radius, too_high, too_low, new_radius, STD_SCALE, MAX_FUDGE
         cdef ITYPE_t approx_count, i, MAX_ITER
         cdef ITYPE_t[::1] approx_array
 
 
         MAX_ITER = 20
-        MAX_FUDGE = 1.0  # MAX_ITER should handle things...
+        MAX_FUDGE = 5.0
         STD_SCALE = 2.0
 
         radius = self._get_start_dist(X, k)
 
         too_low = 0
         too_high = np.inf
-        print(f"testing query_radius_approx with: {X}, {radius}")
         approx_array = self._query_radius_approx(X, radius)
         approx_count = approx_array.shape[0]
 
@@ -359,7 +379,6 @@ cdef class TrilaterationIndex:
             if approx_count == k:
                 break
             elif approx_count < k:
-                print(f"{approx_count} is too few at radius {radius}")
                 too_low = radius
                 new_radius = radius * STD_SCALE
                 if new_radius > too_high:
@@ -369,7 +388,6 @@ cdef class TrilaterationIndex:
                 approx_count = approx_array.shape[0]
                 continue
             elif approx_count > k * MAX_FUDGE:
-                print(f"{approx_count} is too many at radius {radius}")
                 radius = (radius + too_low) / STD_SCALE
                 approx_array = self._query_radius_approx(X, radius)
                 approx_count = approx_array.shape[0]
@@ -394,20 +412,32 @@ cdef class TrilaterationIndex:
             raise ValueError("return_distance must be True "
                              "if sort_results is True")
 
-        cdef ITYPE_t[::1] idx_within_r
+        # cdef ITYPE_t[::1] idx_within_r = np.asarray([1])
+        cdef np.ndarray idx_within_r = np.asarray([1])
+        cdef np.ndarray dists_within_r
 
         if isinstance(X, list):
-            idx_witin_r = self._query_radius_approx([X], r)
-        elif X.shape[0] == 1:
-            idx_witin_r = self._query_radius_approx(X, r)
+            X = np.asarray(X)
+
+        if X.shape[0] == 1:
+            idx_within_r = self._query_radius_approx(X, r)
         else:
-            idx_witin_r = [self._query_radius_approx(x, r) for x in X]
+            idx_within_r = [self._query_radius_approx(x, r) for x in X]
+
+        dists_within_r = self.dist_metric.pairwise(self.data_arr[idx_within_r], X).reshape(idx_within_r.shape[0])
+        valid_idx = [x <= r for x in dists_within_r]
+        dists_within_r = dists_within_r[valid_idx]
+        idx_within_r = idx_within_r[valid_idx]
+
+        if sort_results:
+            sort_order = np.argsort(dists_within_r)
+            dists_within_r = dists_within_r[sort_order]
+            idx_within_r = idx_within_r[sort_order]
 
         if return_distance:
-            dists_within_r = self.dist_metric.pairwise(self.data_arr[idx_within_r], X)
-            return idx_witin_r, dists_within_r
+            return idx_within_r, dists_within_r
 
-        return idx_witin_r
+        return idx_within_r
 
 
     def query_radius_approx(self, X, r):
@@ -426,12 +456,16 @@ cdef class TrilaterationIndex:
 
         # cdef np.ndarray points_in_range = np.arange(self.r_indexes_mv.shape[0]).reshape(self.r_indexes_mv.shape[0])
         cdef np.ndarray q_dists
+        # cdef ITYPE_t[::1] common_idx
+        cdef np.ndarray common_idx
 
-        points_in_range = []
+        if isinstance(X, list) or X.ndim == 1:
+            X = np.asarray([X])
 
+        points_in_range = []
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
-        print(f"q_dists: {q_dists[0]} - {q_dists[0, 1]}")
-        print(f"r.indexes.shape: {self.r_indexes_mv.shape}")
+        # print(f"q_dists: {q_dists[0]} - {q_dists[0, 1]}")
+        # print(f"r.indexes.shape: {self.r_indexes_mv.shape}")
 
         for i in range(self.r_indexes_mv.shape[0]):
             # low_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] - r)
@@ -439,12 +473,11 @@ cdef class TrilaterationIndex:
             # high_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] + r)
             high_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] + r, side="right")
 
-            print(f"low_idx: {low_idx}, high_idx: {high_idx}")
-            print(f"valid IDS: {self.r_indexes[i, low_idx:high_idx]}")
+            # print(f"low_idx: {low_idx}, high_idx: {high_idx}")
+            # print(f"valid IDS: {self.r_indexes[i, low_idx:high_idx]}")
             points_in_range.append(self.r_indexes[i, low_idx:high_idx])
-            print(i)
         
         common_idx = reduce(np.intersect1d, points_in_range)
-        print(f"commonIDs: {common_idx}")
+        # print(f"commonIDs: {common_idx}")
         return common_idx
 

From b76156187aaa45b1e4553894c696d4a15702dea8 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Sat, 20 Feb 2021 22:40:00 -0800
Subject: [PATCH 11/19] query_radius_t3 is relatively performant now

---
 perftest.py                          |  84 ++++++++++++
 sklearn/neighbors/_trilateration.pyx | 194 ++++++++++++++++++++++++---
 2 files changed, 263 insertions(+), 15 deletions(-)
 create mode 100644 perftest.py

diff --git a/perftest.py b/perftest.py
new file mode 100644
index 0000000000000..1775ab08b1718
--- /dev/null
+++ b/perftest.py
@@ -0,0 +1,84 @@
+import csv
+import sklearn
+import numpy as np
+import timeit
+
+from scipy.spatial.distance import cdist
+
+querypoint = np.asarray([[38.25, -85.50]])
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        # print(row)
+        samples.append([row['Latitude'], row['Longitude']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['Latitude'], row['Longitude']])
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute')
+brute.fit(samples)
+BF = brute.radius_neighbors(querypoint, 0.1)
+print(f"brute force results: {BF[1][0]} ({len(BF[1][0])})")
+
+
+tree = KDTree(samples)
+# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
+# print(f"KDTree 500x single point time: {t}")
+tq = tree.query_radius(querypoint, r=0.1)
+print(f"tree query results: {tq} ({len(tq[0])})")
+
+
+trilat = TrilaterationIndex(samples)
+# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
+# print(f"Trilat 500x single point time: {t}")
+tr = trilat.query_radius(querypoint, r=0.1)
+print(f"trilat query_radius: {tr} ({len(tr)})")
+
+
+t3 = trilat.query_radius_t3(querypoint, r=0.1)
+print(f"trilat query_radius_t3: {t3} ({len(t3)})")
+
+print(f"length match 1: {len(np.intersect1d(tq[0], tr))}")
+print(f"length match 2: {len(np.intersect1d(tq[0], t3))}")
+
+print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
+if 18 in tq[0]:
+    print("18 in tq")
+if 18 in t3:
+    print("18 in t3")
+
+
+import pstats, cProfile
+import pyximport
+pyximport.install
+
+import sklearn
+# print(type(querypoint))
+# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
+
+cProfile.runctx('timeit.timeit(lambda: brute.radius_neighbors(querypoint, 0.07), number=500)', globals(), locals(), "BFProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "KDProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "TRProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_radius_t3(querypoint, r=0.07), number=500)', globals(), locals(), "T3Profile.prof")
+
+s = pstats.Stats("BFProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("KDProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("TRProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("T3Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 17aa3ddae60a8..8880e12692cf2 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -2,6 +2,7 @@
 #cython: boundscheck=False
 #cython: wraparound=False
 #cython: cdivision=True
+#cython: profile=True
 
 # Trilateration (or n-fold lateration for arbitrary dimensions)
 # =====================
@@ -22,6 +23,7 @@
 cimport numpy as np
 import numpy as np
 
+from collections import Counter
 from functools import reduce
 from libc.math cimport fabs, ceil
 
@@ -66,8 +68,11 @@ cdef class TrilaterationIndex:
     cdef public ITYPE_t[::1] idx_array
     cdef readonly DTYPE_t[:, ::1] distances
 
+    cdef int ndims
+
     cdef DistanceMetric dist_metric
 
+
     def __cinit__(self):
         self.data_arr = np.empty((1, 1), dtype=DTYPE, order='C')
 
@@ -138,8 +143,8 @@ cdef class TrilaterationIndex:
 
         cdef DTYPE_t MAX_REF = 9999.0
 
-        ndims = self.data_arr.shape[1]
-        refs = [[0]*i+[MAX_REF]*(ndims-i) for i in range(ndims)]
+        self.ndims = self.data_arr.shape[1]
+        refs = [[0]*i+[MAX_REF]*(self.ndims-i) for i in range(self.ndims)]
         return np.asarray(refs)
 
 
@@ -168,9 +173,9 @@ cdef class TrilaterationIndex:
         """
 
         if isinstance(X, list):
-            results = self._query_one(np.asarray(X), k, return_distance, sort_results)
+            results = self._query_one(np.asarray(X), k, return_distance)
         elif X.shape[0] == 1:
-            results=self._query_one(X, k, return_distance, sort_results)
+            results=self._query_one(X, k, return_distance)
         else:
             raise NotImplementedError("Not yet")
             # [print(x) for x in X]
@@ -182,8 +187,7 @@ cdef class TrilaterationIndex:
         return results
 
     cdef _query_one(self, X, k=5,
-              return_distance=True,
-              sort_results=True):
+              return_distance=True):
         """
         query the index for k nearest neighbors
         """
@@ -372,7 +376,9 @@ cdef class TrilaterationIndex:
 
         too_low = 0
         too_high = np.inf
-        approx_array = self._query_radius_approx(X, radius)
+        # approx_array = self._query_radius_at_least_k(X, radius, k)
+        approx_array = self._query_radius_r0_only(X, radius)
+        
         approx_count = approx_array.shape[0]
 
         for i in range(MAX_ITER):
@@ -384,12 +390,12 @@ cdef class TrilaterationIndex:
                 if new_radius > too_high:
                     new_radius = (too_high + radius) / STD_SCALE
                 radius = new_radius
-                approx_array = self._query_radius_approx(X, radius)
+                approx_array = self._query_radius_at_least_k(X, radius, k)
                 approx_count = approx_array.shape[0]
                 continue
             elif approx_count > k * MAX_FUDGE:
                 radius = (radius + too_low) / STD_SCALE
-                approx_array = self._query_radius_approx(X, radius)
+                approx_array = self._query_radius_at_least_k(X, radius, k)
                 approx_count = approx_array.shape[0]
             else:
                 continue
@@ -449,15 +455,17 @@ cdef class TrilaterationIndex:
         return result
 
     cdef _query_radius_approx(self, X, r):
+        """
 
-        # The main self.idx_array and self.distances sort ONLY by the first
-        # reference distance.  Here we sort by all three.
-        # Tune this some day...
+        """
 
         # cdef np.ndarray points_in_range = np.arange(self.r_indexes_mv.shape[0]).reshape(self.r_indexes_mv.shape[0])
         cdef np.ndarray q_dists
         # cdef ITYPE_t[::1] common_idx
         cdef np.ndarray common_idx
+        cdef np.ndarray new_points
+
+        point_counts = Counter()
 
         if isinstance(X, list) or X.ndim == 1:
             X = np.asarray([X])
@@ -467,7 +475,7 @@ cdef class TrilaterationIndex:
         # print(f"q_dists: {q_dists[0]} - {q_dists[0, 1]}")
         # print(f"r.indexes.shape: {self.r_indexes_mv.shape}")
 
-        for i in range(self.r_indexes_mv.shape[0]):
+        for i in range(self.ndims):
             # low_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] - r)
             low_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] - r, side="left")
             # high_idx = _find_nearest_sorted_1D(self.r_distances_mv[i, :], q_dists[0, i] + r)
@@ -475,9 +483,165 @@ cdef class TrilaterationIndex:
 
             # print(f"low_idx: {low_idx}, high_idx: {high_idx}")
             # print(f"valid IDS: {self.r_indexes[i, low_idx:high_idx]}")
-            points_in_range.append(self.r_indexes[i, low_idx:high_idx])
+            new_points = self.r_indexes[i, low_idx:high_idx]
+            # print(f"new_points: {new_points} len: {len(new_points)}")
+            # point_counts.update(new_points)  # Failed performance improvement test; leaving for posterity
+            points_in_range.append(new_points)
         
-        common_idx = reduce(np.intersect1d, points_in_range)
+        # THIS IS THE PERFORMANCE PROBLEM:
+        common_idx = reduce(np.intersect1d, points_in_range)  # This works but seems slow (fastest so far tho)
+        # common_idx = np.asarray([k for (k, v) in point_counts.items() if v == self.r_indexes_mv.shape[0]])
+
         # print(f"commonIDs: {common_idx}")
         return common_idx
 
+    cdef _query_radius_at_least_k(self, X, r, k):
+        """ for use with expanding estimates...
+        break early if we've eliminated too many points
+        """
+
+        cdef np.ndarray q_dists
+        cdef np.ndarray common_idx
+
+        if isinstance(X, list) or X.ndim == 1:
+            X = np.asarray([X])
+
+        points_in_range = []
+        new_points = []
+
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+
+        for i in range(self.r_indexes_mv.shape[0]):
+            low_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] - r, side="left")
+            high_idx = np.searchsorted(self.r_distances_mv[i, :], q_dists[0, i] + r, side="right")
+
+            new_points = self.r_indexes[i, low_idx:high_idx]
+
+            if points_in_range == []:
+                points_in_range = new_points
+            else:
+                # THIS IS THE PERFORMANCE PROBLEM:
+                points_in_range = np.intersect1d(points_in_range, new_points)  # This is the line that works, but slowly
+            # At Least k voodoo:
+            if len(points_in_range) < k:
+                break
+        
+        common_idx = np.asarray(points_in_range)
+        return common_idx
+
+
+    def query_radius_t3(self, X, r):
+        """
+        query the index for neighbors within a radius r
+        return approximate results only (by relying on index distances)
+        """
+        # result = np.asarray(self._query_radius_r0_only(X, r))
+        result = self._query_radius_r0_only(X, r)
+
+        return result
+
+    cdef _query_radius_r0_only(self, X, r):
+        """
+        This approach uses only the original sort order from Ref Point 0
+        and then brute forces whatever that index indicates MAY be in range
+
+        NOT FINISHED
+        """
+
+        cdef DTYPE_t[:, ::1] q_dists # , dist_results
+        cdef np.ndarray[DTYPE_t, ndim=1] dist_results
+        # cdef DTYPE_t[::1] dist_results
+        cdef ITYPE_t[::1] approx_idxs, results
+        # cdef np.ndarray[DTYPE_t, ndim=2] q_dists, dist_results
+        # cdef np.ndarray[ITYPE_t] approx_idxs #, points_in_range
+        cdef ITYPE_t i, low_idx, high_idx, count
+
+        if isinstance(X, list) or X.ndim == 1:
+            X = np.asarray([X])
+
+        # points_in_range = []
+
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+        # ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
+
+        # Which of the following two pairs are reliably faster?  Probably neither:
+        # low_idx = np.searchsorted(self.r_distances_mv[0,:], q_dists[0, 0] - r, side="left")
+        # high_idx = np.searchsorted(self.r_distances_mv[0,:], q_dists[0, 0] + r, side="right")
+
+        low_idx = np.searchsorted(self.distances[:,0], q_dists[0, 0] - r, side="left")
+        high_idx = np.searchsorted(self.distances[:,0], q_dists[0, 0] + r, side="right")
+        
+        # print(f"self.distances.shape: {self.distances.shape}, low_idx: {low_idx}, high_idx: {high_idx}")
+        # print(f"self.distances[low_idx, :]: {np.asarray(self.distances[low_idx,:])}")
+        # print(f"self.distances[low_idx+1, :]: {np.asarray(self.distances[low_idx+1,:])}")
+        # print(f"self.distances[low_idx+2, :]: {np.asarray(self.distances[low_idx+2,:])}")
+
+        # print(f"q_dists[0, :]: {q_dists[0, :]}")
+        # print(f"self.distances - qd: {abs(np.asarray(self.distances[low_idx,:] - q_dists[0, :]))} vs r: {r}")
+
+        # The next two lines work but are 10 times slower than just brute alone (as-is)
+        # approx_idxs = np.asarray([i for i in range(low_idx, high_idx+1) \
+        #               if np.all(abs(np.asarray(self.distances[i,:]) - q_dists[0, :]) <= r)])
+        # approx_idxs = [self.idx_array[i] for i in approx_idxs]
+        approx_idxs = self.idx_array[low_idx:high_idx+1]
+        # print(f"approx_idxs (len {len(approx_idxs)}): {np.array(approx_idxs)}")
+
+        dist_results = self.dist_metric.pairwise(self.data_arr[approx_idxs], X).reshape(approx_idxs.shape[0])
+        # print(f"len(dist_results): {len(dist_results)} or {dist_results.shape[0]} low_idx:high_idx {low_idx}:{high_idx}")
+        dist_results = dist_results.reshape(dist_results.shape[0])
+        # print(f"dist_results: {np.asarray(dist_results)}")
+
+        # cdef ITYPE_t result_count
+        # result_count = np.count_nonzero(dist_results <= r)
+        # print(f"dist_results <= r: {dist_results <= r}")
+        # print(f"result_count: {result_count}")
+
+        # result_arr = np.zeros(result_count, dtype=ITYPE)
+        result_arr = np.zeros(len(dist_results), dtype=ITYPE)
+        # results = get_memview_ITYPE_1D(result_arr)  # If you replace result_arr with results from here on you get nonsense.  I should understand, but do not.
+
+        count = 0
+        for i in range(len(dist_results)):
+            # print(f"{i} (count: {count}) testing point {approx_idxs[i]} with dist: {dist_results[i]}")
+            if dist_results[i] <= r:
+                # if approx_idxs[i] == 18:
+                #     print(f"adding {approx_idxs[i]}")
+                result_arr[count] = approx_idxs[i]
+                count += 1
+
+
+        # NOW WHAT?
+        # results_arr = approx_idxs[dist_results <= r]
+
+        # Why is this slow: ?
+        # results = approx_idxs[[range(8000)]]
+
+        #  This alone is slow... why?
+        # cdef np.array(dtype=bool) things  # Not sure how to declare this
+        # cdef ITYPE_t[::1] things
+        # things = [x <= r for x in dist_results]
+
+        # for i in range(len(dist_results)):
+        #     if things[i]:
+        #         results.append(approx_idxs[i])
+
+        # This is slow but works:
+        # results = approx_idxs[[x[0] <= r for x in dist_results]]
+
+        # This is even slower but works:
+        # results = np.asarray([approx_idxs[i] for i in range(len(approx_idxs)) if dist_results[i] <= r])
+
+        # print(f"results: {results}")
+        # return results
+        # return np.sort(np.asarray(results[0:count]))
+        return  np.asarray(result_arr[0:count])
+
+
+    cdef _query_radius_best_r_only(self, X, r):
+        """
+        This approach counts the candidates using each reference point by themselves
+        then brute forces the indexees from the best possible result
+
+        NOT FINISHED
+        """
+        pass
\ No newline at end of file

From a2cb2f3cf0eb1b9f7952daa1c4f64cd105de8943 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Tue, 23 Feb 2021 10:00:50 -0800
Subject: [PATCH 12/19] WIP for _query_expand_2.  Slight perf improvement for
 query_radius

---
 perftest.py                          |  84 -------------------
 sklearn/neighbors/_trilateration.pyx | 121 ++++++++++++++++++++++-----
 2 files changed, 99 insertions(+), 106 deletions(-)
 delete mode 100644 perftest.py

diff --git a/perftest.py b/perftest.py
deleted file mode 100644
index 1775ab08b1718..0000000000000
--- a/perftest.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import csv
-import sklearn
-import numpy as np
-import timeit
-
-from scipy.spatial.distance import cdist
-
-querypoint = np.asarray([[38.25, -85.50]])
-
-samples = []
-refpoints = []
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        # print(row)
-        samples.append([row['Latitude'], row['Longitude']])
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        print(row)
-        refpoints.append([row['Latitude'], row['Longitude']])
-
-from sklearn.neighbors import NearestNeighbors, KDTree
-from sklearn.neighbors import TrilaterationIndex
-
-brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute')
-brute.fit(samples)
-BF = brute.radius_neighbors(querypoint, 0.1)
-print(f"brute force results: {BF[1][0]} ({len(BF[1][0])})")
-
-
-tree = KDTree(samples)
-# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
-# print(f"KDTree 500x single point time: {t}")
-tq = tree.query_radius(querypoint, r=0.1)
-print(f"tree query results: {tq} ({len(tq[0])})")
-
-
-trilat = TrilaterationIndex(samples)
-# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
-# print(f"Trilat 500x single point time: {t}")
-tr = trilat.query_radius(querypoint, r=0.1)
-print(f"trilat query_radius: {tr} ({len(tr)})")
-
-
-t3 = trilat.query_radius_t3(querypoint, r=0.1)
-print(f"trilat query_radius_t3: {t3} ({len(t3)})")
-
-print(f"length match 1: {len(np.intersect1d(tq[0], tr))}")
-print(f"length match 2: {len(np.intersect1d(tq[0], t3))}")
-
-print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
-if 18 in tq[0]:
-    print("18 in tq")
-if 18 in t3:
-    print("18 in t3")
-
-
-import pstats, cProfile
-import pyximport
-pyximport.install
-
-import sklearn
-# print(type(querypoint))
-# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
-
-cProfile.runctx('timeit.timeit(lambda: brute.radius_neighbors(querypoint, 0.07), number=500)', globals(), locals(), "BFProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "KDProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "TRProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_radius_t3(querypoint, r=0.07), number=500)', globals(), locals(), "T3Profile.prof")
-
-s = pstats.Stats("BFProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("KDProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("TRProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("T3Profile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 8880e12692cf2..848af20a3f0c5 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -189,7 +189,7 @@ cdef class TrilaterationIndex:
     cdef _query_one(self, X, k=5,
               return_distance=True):
         """
-        query the index for k nearest neighbors
+        query the index for k nearest neighbors for a single point X
         """
         if X.shape[0] != 1:
             raise ValueError("_query takes only a single X point"
@@ -309,7 +309,8 @@ cdef class TrilaterationIndex:
 
         return(self.distances[high_idx, 0] - self.distances[low_idx, 0])
 
-    def query_expand(self, X, k, return_distance=True, sort_results=True):
+    def query_expand(self, X, k, return_distance=True, sort_results=True,
+                     mfudge=5.0, miter=20, sscale=2.0):
         """
             X can be what...?
             A singlie list of one coordinate: [50, 25, 100]
@@ -325,7 +326,7 @@ cdef class TrilaterationIndex:
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
 
         if X.shape[0] == 1:
-            idx_results = np.asarray(self._query_expand(X, k))
+            idx_results = np.asarray(self._query_expand(X, k, mfudge, miter, sscale))
 
         else:
             raise NotImplementedError("You can't do that yet")
@@ -333,21 +334,28 @@ cdef class TrilaterationIndex:
             # all_results = [self._query_expand(x, k) for x in X]
             # print(f"all_results: {all_results}")
 
-        dist_results = self.dist_metric.pairwise(self.data_arr[idx_results], X)
-        # best = np.argsort(dist_results)
-        # dist_results = dist_results[best]
-        # idx_results = idx_results[best]
-
-        for idx, dist in zip(idx_results, dist_results):
-            heap.push(0, dist, idx)
-
         if return_distance:
+            dist_results = self.dist_metric.pairwise(self.data_arr[idx_results], X)
+            # best = np.argsort(dist_results)
+            # dist_results = dist_results[best]
+            # idx_results = idx_results[best]
+            for idx, dist in zip(idx_results, dist_results):
+                heap.push(0, dist, idx)
+
             return heap.get_arrays()
 
         return idx_results
 
+    cdef ITYPE_t _count_in_range(self, DTYPE_t* dists, r, size):
+        cdef ITYPE_t count = 0, i
+
+        for i in range(size):
+            if dists[i] <= r:
+                count += 1
+
+        return count
 
-    cdef _query_expand(self, X, k):
+    cdef _query_expand(self, X, k, mfudge, miter, sscale):
         """
         return k-nn by the expanding method...
         select a starting radius, iterate over query_radius
@@ -363,14 +371,14 @@ cdef class TrilaterationIndex:
             raise ValueError("query data dimension must "
                              "match training data dimension")
 
-        cdef DTYPE_t radius, too_high, too_low, new_radius, STD_SCALE, MAX_FUDGE
+        cdef DTYPE_t radius, too_high, too_low, new_radius, STD_SCALE, MAX_FUDGE, ratio
         cdef ITYPE_t approx_count, i, MAX_ITER
         cdef ITYPE_t[::1] approx_array
 
 
-        MAX_ITER = 20
-        MAX_FUDGE = 5.0
-        STD_SCALE = 2.0
+        MAX_ITER = miter
+        MAX_FUDGE = mfudge
+        STD_SCALE = sscale
 
         radius = self._get_start_dist(X, k)
 
@@ -387,15 +395,18 @@ cdef class TrilaterationIndex:
             elif approx_count < k:
                 too_low = radius
                 new_radius = radius * STD_SCALE
+                # new_radius = radius * k / approx_count
                 if new_radius > too_high:
-                    new_radius = (too_high + radius) / STD_SCALE
+                    new_radius = (too_high + too_low) / 2
                 radius = new_radius
-                approx_array = self._query_radius_at_least_k(X, radius, k)
+                # approx_array = self._query_radius_at_least_k(X, radius, k)
+                approx_array = self._query_radius_r0_only(X, radius)
                 approx_count = approx_array.shape[0]
                 continue
             elif approx_count > k * MAX_FUDGE:
                 radius = (radius + too_low) / STD_SCALE
-                approx_array = self._query_radius_at_least_k(X, radius, k)
+                # approx_array = self._query_radius_at_least_k(X, radius, k)
+                approx_array = self._query_radius_r0_only(X, radius)
                 approx_count = approx_array.shape[0]
             else:
                 continue
@@ -403,6 +414,74 @@ cdef class TrilaterationIndex:
         return approx_array
 
 
+    cdef _query_expand_2(self, X, k, mfudge, miter, sscale):
+        """
+        return k-nn by the expanding method...
+        select a starting radius
+        In this case we expand the radius for the sorted self.distances
+        NOT necessarily just precise distances
+        """
+
+        if X.shape[0] != 1:
+            raise ValueError("_query takes only a single X point"
+                             "use query for multiple records")
+
+        if X.shape[X.ndim - 1] != self.data.shape[1]:
+            raise ValueError("query data dimension must "
+                             "match training data dimension")
+
+        cdef DTYPE_t STD_SCALE, MAX_FUDGE, ratio, radius, dist
+        cdef ITYPE_t approx_count, i, MAX_ITER, low_count, low_idx, high_idx, ground_idx, CHUNK_SIZE
+        cdef ITYPE_t new_high, new_low, idx
+        cdef ITYPE_t[::1] approx_array
+        cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
+
+        cdef np.ndarray q_dists
+        cdef DTYPE_t[::1] new_dists, approx_dists, low_dists, high_dists
+
+        MAX_ITER = miter
+        MAX_FUDGE = mfudge
+        STD_SCALE = sscale
+        CHUNK_SIZE = max(k, 500)  # Tune?
+
+        q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
+        ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
+        low_idx = min(ground_idx - k, 0)
+        high_idx = max(ground_idx + k, self.data_arr.shape[1])
+
+        approx_array = self.idx_array[low_idx:high_idx+1]
+
+        approx_dists = self.dist_metric.pairwise(self.data_arr[approx_array], X).reshape(approx_array.shape[0])
+
+        for idx, dist in zip(approx_array, approx_dists):
+            if dist < heap.largest(0):
+                heap.push(0, dist, idx)
+
+        radius = heap.largest(0)
+        
+        approx_count = self._count_in_range(&approx_dists[0], radius, approx_dists.shape[0])
+
+        while approx_count < k:
+            new_low = min(low_idx - CHUNK_SIZE, 0)
+            new_high = max(high_idx + CHUNK_SIZE, self.data_arr.shape[1])
+            if new_low < low_idx:
+                low_dists = self.dist_metric.pairwise(self.idx_array[new_low:low_idx], X).reshape(low_idx - new_low)
+                low_count = self._count_in_range(&low_dists[0], radius, low_dists.shape[0])
+                approx_count = approx_count + low_count
+                low_idx = new_low
+            
+            if new_high > high_idx:
+                high_dists = self.dist_metric.pairwise(self.idx_array[high_idx:new_high], X).reshape(new_high - high_idx)
+                high_count = self._count_in_range(&high_dists[0], radius, high_dists.shape[0])
+                approx_count = approx_count + high_count
+                high_idx = new_high
+
+            approx_array = self.idx_array[low_idx:high_idx+1]
+            radius = heap.largest(0)
+  
+        return approx_array
+
+
     def query_radius(self, X, r,
                      int return_distance=False,
                      int count_only=False,
@@ -544,8 +623,6 @@ cdef class TrilaterationIndex:
         """
         This approach uses only the original sort order from Ref Point 0
         and then brute forces whatever that index indicates MAY be in range
-
-        NOT FINISHED
         """
 
         cdef DTYPE_t[:, ::1] q_dists # , dist_results
@@ -588,7 +665,7 @@ cdef class TrilaterationIndex:
 
         dist_results = self.dist_metric.pairwise(self.data_arr[approx_idxs], X).reshape(approx_idxs.shape[0])
         # print(f"len(dist_results): {len(dist_results)} or {dist_results.shape[0]} low_idx:high_idx {low_idx}:{high_idx}")
-        dist_results = dist_results.reshape(dist_results.shape[0])
+        # dist_results = dist_results.reshape(dist_results.shape[0])
         # print(f"dist_results: {np.asarray(dist_results)}")
 
         # cdef ITYPE_t result_count

From 52a5e6bd42d2b330222a51c2e7693d138858c9f2 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Tue, 23 Feb 2021 13:44:04 -0800
Subject: [PATCH 13/19] Query expand 2 performed well in ann!  Yay!

---
 sklearn/neighbors/_trilateration.pyx | 95 ++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 27 deletions(-)

diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 848af20a3f0c5..bc1dc1aa53de3 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -414,10 +414,37 @@ cdef class TrilaterationIndex:
         return approx_array
 
 
+    def query_expand_2(self, X, k, return_distance=True, sort_results=True,
+                     mfudge=5.0, miter=20, sscale=2.0):
+        """
+            X can be what...?
+            A singlie list of one coordinate: [50, 25, 100]
+        """
+        cdef np.ndarray idx_results
+        # cdef DTYPE_t[::1] dist_results
+        cdef np.ndarray dist_results
+
+        # print(f"taa daa: {X}")
+
+        if isinstance(X, list):
+            X = np.asarray(X)
+
+        cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
+
+        if X.shape[0] == 1:
+            heap = self._query_expand_2(X, k, mfudge, miter, sscale)
+
+        else:
+            raise NotImplementedError("You can't do that yet")
+
+        if return_distance:
+            return heap.get_arrays()
+
+        return heap.get_arrays()  # Fix this
+
     cdef _query_expand_2(self, X, k, mfudge, miter, sscale):
         """
         return k-nn by the expanding method...
-        select a starting radius
         In this case we expand the radius for the sorted self.distances
         NOT necessarily just precise distances
         """
@@ -436,7 +463,7 @@ cdef class TrilaterationIndex:
         cdef ITYPE_t[::1] approx_array
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
 
-        cdef np.ndarray q_dists
+        cdef np.ndarray q_dists, temp_data
         cdef DTYPE_t[::1] new_dists, approx_dists, low_dists, high_dists
 
         MAX_ITER = miter
@@ -446,40 +473,54 @@ cdef class TrilaterationIndex:
 
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
         ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
-        low_idx = min(ground_idx - k, 0)
-        high_idx = max(ground_idx + k, self.data_arr.shape[1])
+        low_idx = max(ground_idx - k, 0)
+        high_idx = min(ground_idx + k, self.data_arr.shape[0])
 
         approx_array = self.idx_array[low_idx:high_idx+1]
-
+        # print(f"k: {k}, ground_idx: {ground_idx}, low_idx: {low_idx}, high_idx: {high_idx}")
+        # print(f"getting dists for approx_array: {approx_array} with shape: {approx_array.shape}")
         approx_dists = self.dist_metric.pairwise(self.data_arr[approx_array], X).reshape(approx_array.shape[0])
 
         for idx, dist in zip(approx_array, approx_dists):
             if dist < heap.largest(0):
                 heap.push(0, dist, idx)
-
-        radius = heap.largest(0)
         
-        approx_count = self._count_in_range(&approx_dists[0], radius, approx_dists.shape[0])
-
-        while approx_count < k:
-            new_low = min(low_idx - CHUNK_SIZE, 0)
-            new_high = max(high_idx + CHUNK_SIZE, self.data_arr.shape[1])
-            if new_low < low_idx:
-                low_dists = self.dist_metric.pairwise(self.idx_array[new_low:low_idx], X).reshape(low_idx - new_low)
-                low_count = self._count_in_range(&low_dists[0], radius, low_dists.shape[0])
-                approx_count = approx_count + low_count
-                low_idx = new_low
-            
-            if new_high > high_idx:
-                high_dists = self.dist_metric.pairwise(self.idx_array[high_idx:new_high], X).reshape(new_high - high_idx)
-                high_count = self._count_in_range(&high_dists[0], radius, high_dists.shape[0])
-                approx_count = approx_count + high_count
-                high_idx = new_high
-
-            approx_array = self.idx_array[low_idx:high_idx+1]
-            radius = heap.largest(0)
+        lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
+        highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
+
+        # While the "not closer than" numbers are still closer than the current solution...
+        count = 0
+        while (lowdelta < heap.largest(0) and low_idx > 0) \
+              or (highdelta < heap.largest(0) and high_idx < self.data_arr.shape[0]):
+            count += 1
+            # print(count, lowdelta, highdelta, heap.largest(0), low_idx, high_idx)
+            if lowdelta < heap.largest(0):
+                new_low = max(low_idx - CHUNK_SIZE, 0)
+                if new_low < low_idx:
+                    low_dists = self.dist_metric.pairwise(self.data_arr[self.idx_array[new_low:low_idx]], X)[0] #.reshape(low_idx - new_low)
+                    for idx in range(low_idx - new_low):
+                        if low_dists[idx] < heap.largest(0):
+                            heap.push(0, low_dists[idx], self.idx_array[idx + new_low])
+                    low_idx = new_low
+
+            if highdelta < heap.largest(0):
+                new_high = min(high_idx + CHUNK_SIZE, self.data_arr.shape[0])
+                if new_high > high_idx:
+                    # print(f"new_high: {new_high}, high_idx: {high_idx}, self.data_arr.shape[1]")
+                    high_dists = self.dist_metric.pairwise(self.data_arr[self.idx_array[high_idx:new_high]], X)[0] #.reshape(new_high - high_idx)
+                    # testing approaches for performance:
+                    for idx in range(new_high - high_idx):
+                        if high_dists[idx] < heap.largest(0):
+                            heap.push(0, high_dists[idx], self.idx_array[idx + high_idx])
+                    # for idx, dist in zip(self.idx_array[high_idx:new_high], high_dists):
+                    #     if dist < heap.largest(0):
+                    #         heap.push(0, dist, idx)
+                    high_idx = new_high
+
+            lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
+            highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
   
-        return approx_array
+        return heap
 
 
     def query_radius(self, X, r,

From e4fc3e8e49ebe89f14596b61c2fe61b6f9446a0b Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Thu, 11 Mar 2021 19:27:06 -0800
Subject: [PATCH 14/19] Working (very slow) basic geodesic distance from geopy

---
 perftest_geodesic.py                 | 114 ++++++++++++++++++++++++++
 perftest_query.py                    | 115 +++++++++++++++++++++++++++
 perftest_radius.py                   |  84 +++++++++++++++++++
 sklearn/metrics/pairwise.py          |  54 ++++++++++++-
 sklearn/neighbors/_ball_tree.pyx     |   3 +-
 sklearn/neighbors/_dist_metrics.pyx  |  31 +++++++-
 sklearn/neighbors/_kd_tree.pyx       |   3 +-
 sklearn/neighbors/_trilateration.pyx |   8 +-
 8 files changed, 404 insertions(+), 8 deletions(-)
 create mode 100644 perftest_geodesic.py
 create mode 100644 perftest_query.py
 create mode 100644 perftest_radius.py

diff --git a/perftest_geodesic.py b/perftest_geodesic.py
new file mode 100644
index 0000000000000..6ecc75d1f3ea8
--- /dev/null
+++ b/perftest_geodesic.py
@@ -0,0 +1,114 @@
+import csv
+import sklearn
+import numpy as np
+import timeit
+import time
+
+from scipy.spatial.distance import cdist
+from geopy.distance import geodesic
+
+
+import pstats, cProfile
+import pyximport
+pyximport.install
+
+import sklearn
+
+start = time.time()
+
+querypoint = np.asarray([[38.25, -85.50]])
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        # print(row)
+        samples.append([row['Latitude'], row['Longitude']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['Latitude'], row['Longitude']])
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+print(f"{time.time() - start} seconds since start")
+
+
+brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute', metric='geodesic')
+brute.fit(samples)
+BF = brute.kneighbors(querypoint, 100)
+print(f"brute force results: {BF[1][0]} ({len(BF[1][0])}) - at time {time.time() - start} seconds")
+
+
+tree = KDTree(samples, metric='geodesic')
+# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
+# print(f"KDTree 500x single point time: {t}")
+tq = tree.query(querypoint, k=100)
+print(f"tree query results: {tq} ({len(tq[0])}) - at time {time.time() - start} seconds")
+
+
+trilat = TrilaterationIndex(samples, metric='geodesic')
+# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
+# print(f"Trilat 500x single point time: {t}")
+tr = trilat.query(querypoint, k=100)
+print(f"trilat query_radius: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
+
+
+t3 = trilat.query_expand(querypoint, k=100)
+print(f"trilat query_radius_t3: {t3} ({len(t3)}) - at time {time.time() - start} seconds")
+
+t4 = trilat.query_expand_2(querypoint, k=100)
+print(f"trilat query_expand_2: {t4} ({len(t4)}) - at time {time.time() - start} seconds")
+
+print(f"length match 1: {len(np.intersect1d(tq[1], tr[1]))}")
+print(f"length match 2: {len(np.intersect1d(tq[1], t3[1]))}")
+print(f"length match 4: {len(np.intersect1d(tq[1], t4[1]))}")
+
+# exit()
+
+print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
+if 18 in tq[0]:
+    print("18 in tq")
+if 18 in t3[1]:
+    print("18 in t3")
+
+
+
+# print(type(querypoint))
+# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
+
+# print(f"timing brute force:")
+# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=20)', globals(), locals(), "QBFProfile.prof")
+
+print("timing kd tree")
+cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile_geo.prof")
+
+
+print("timing trilat.query")
+cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=20)', globals(), locals(), "QTRProfile_geo.prof")
+
+print("timing query_expand")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT3Profile_geo.prof")
+
+print("timing query_expand_2")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT4Profile_geo.prof")
+
+s = pstats.Stats("QBFProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QKDProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QTRProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QT3Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QT4Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/perftest_query.py b/perftest_query.py
new file mode 100644
index 0000000000000..ca4cefbeb0434
--- /dev/null
+++ b/perftest_query.py
@@ -0,0 +1,115 @@
+import csv
+import sklearn
+import numpy as np
+import timeit
+import time
+
+from scipy.spatial.distance import cdist
+
+import pstats, cProfile
+import pyximport
+pyximport.install
+
+import sklearn
+
+start = time.time()
+
+querypoint = np.asarray([[38.25, -85.50]])
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        # print(row)
+        samples.append([row['Latitude'], row['Longitude']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['Latitude'], row['Longitude']])
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+print(f"{time.time() - start} seconds since start")
+
+
+brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute', metric='euclidean')
+brute.fit(samples)
+BF = brute.kneighbors(querypoint, 100)
+print(f"brute force results: {BF[1][0]} ({len(BF[1][0])}) - at time {time.time() - start} seconds")
+
+
+tree = KDTree(samples, metric='euclidean')
+# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
+# print(f"KDTree 500x single point time: {t}")
+tq = tree.query(querypoint, k=100)
+print(f"tree query results: {tq} ({len(tq[0])}) - at time {time.time() - start} seconds")
+
+
+trilat = TrilaterationIndex(samples, metric='euclidean')
+# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
+# print(f"Trilat 500x single point time: {t}")
+tr = trilat.query(querypoint, k=100)
+print(f"trilat query_radius: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
+
+
+t3 = trilat.query_expand(querypoint, k=100)
+print(f"trilat query_radius_t3: {t3} ({len(t3)}) - at time {time.time() - start} seconds")
+
+t4 = trilat.query_expand_2(querypoint, k=100)
+print(f"trilat query_expand_2: {t4} ({len(t4)}) - at time {time.time() - start} seconds")
+
+print(f"length match 1: {len(np.intersect1d(tq[1], tr[1]))}")
+print(f"length match 2: {len(np.intersect1d(tq[1], t3[1]))}")
+print(f"length match 4: {len(np.intersect1d(tq[1], t4[1]))}")
+
+# exit()
+
+print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
+if 18 in tq[0]:
+    print("18 in tq")
+if 18 in t3[1]:
+    print("18 in t3")
+
+
+
+# print(type(querypoint))
+# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
+
+# print(f"timing brute force:")
+# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=500)', globals(), locals(), "QBFProfile.prof")
+print("running one kd tree")
+tree.query(querypoint, k=100)
+
+print("timing kd tree")
+# cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=500)', globals(), locals(), "QKDProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile.prof")
+
+
+# print("timing trilat.query")
+# cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=500)', globals(), locals(), "QTRProfile.prof")
+
+print("timing query_expand")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=500)', globals(), locals(), "QT3Profile.prof")
+
+print("timing query_expand_2")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=500)', globals(), locals(), "QT4Profile.prof")
+
+s = pstats.Stats("QBFProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QKDProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QTRProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QT3Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("QT4Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/perftest_radius.py b/perftest_radius.py
new file mode 100644
index 0000000000000..1775ab08b1718
--- /dev/null
+++ b/perftest_radius.py
@@ -0,0 +1,84 @@
+import csv
+import sklearn
+import numpy as np
+import timeit
+
+from scipy.spatial.distance import cdist
+
+querypoint = np.asarray([[38.25, -85.50]])
+
+samples = []
+refpoints = []
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        # print(row)
+        samples.append([row['Latitude'], row['Longitude']])
+
+with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
+    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
+    for row in reader:
+        print(row)
+        refpoints.append([row['Latitude'], row['Longitude']])
+
+from sklearn.neighbors import NearestNeighbors, KDTree
+from sklearn.neighbors import TrilaterationIndex
+
+brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute')
+brute.fit(samples)
+BF = brute.radius_neighbors(querypoint, 0.1)
+print(f"brute force results: {BF[1][0]} ({len(BF[1][0])})")
+
+
+tree = KDTree(samples)
+# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
+# print(f"KDTree 500x single point time: {t}")
+tq = tree.query_radius(querypoint, r=0.1)
+print(f"tree query results: {tq} ({len(tq[0])})")
+
+
+trilat = TrilaterationIndex(samples)
+# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
+# print(f"Trilat 500x single point time: {t}")
+tr = trilat.query_radius(querypoint, r=0.1)
+print(f"trilat query_radius: {tr} ({len(tr)})")
+
+
+t3 = trilat.query_radius_t3(querypoint, r=0.1)
+print(f"trilat query_radius_t3: {t3} ({len(t3)})")
+
+print(f"length match 1: {len(np.intersect1d(tq[0], tr))}")
+print(f"length match 2: {len(np.intersect1d(tq[0], t3))}")
+
+print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
+if 18 in tq[0]:
+    print("18 in tq")
+if 18 in t3:
+    print("18 in t3")
+
+
+import pstats, cProfile
+import pyximport
+pyximport.install
+
+import sklearn
+# print(type(querypoint))
+# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
+
+cProfile.runctx('timeit.timeit(lambda: brute.radius_neighbors(querypoint, 0.07), number=500)', globals(), locals(), "BFProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "KDProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "TRProfile.prof")
+cProfile.runctx('timeit.timeit(lambda: trilat.query_radius_t3(querypoint, r=0.07), number=500)', globals(), locals(), "T3Profile.prof")
+
+s = pstats.Stats("BFProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("KDProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("TRProfile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
+
+s = pstats.Stats("T3Profile.prof")
+s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index a3cf7f4bf1d72..0aad03de94f9d 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -751,6 +751,53 @@ def haversine_distances(X, Y=None):
     from sklearn.neighbors import DistanceMetric
     return DistanceMetric.get_metric('haversine').pairwise(X, Y)
 
+def geodesic_distances(X, Y=None):
+    """Compute the Geodesic distance between samples in X and Y.
+
+    The geodesic distance is an accurate measure of the distance between
+        two points on the earth, using an ellipsoidal model of the earth
+        which is more accurate than the spherical model used in Haversine.
+
+        The trade-off is that no closed form calculation exists, so the
+        process is iterative, using numeric techniques until a specific
+        accuracy is reached.
+
+        See the geopy.distance.geodesic code for implementation details.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples_X, 2)
+
+    Y : array-like of shape (n_samples_Y, 2), default=None
+
+    Returns
+    -------
+    distance : ndarray of shape (n_samples_X, n_samples_Y)
+
+    Notes
+    -----
+    This will be significantly slower than the Haversine distance and
+    should only be used when more accuracy is needed.
+
+    Unlike Haversine, which requires input in radians, the geodesic
+    implementation works on (latitude, longitude) pairs.
+
+    Examples
+    --------
+    We want to calculate the distance between the Ezeiza Airport
+    (Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,
+    France).
+
+    >>> from sklearn.metrics.pairwise import geodesic_distances
+    >>> bsas = [-34.83333, -58.5166646]
+    >>> paris = [49.0083899664, 2.53844117956]
+    >>> result = geodesic_distances([bsas, paris])
+    >>> result
+    array([[    0.        , 11099.54035582],
+           [11099.54035582,     0.        ]])
+    """
+    from sklearn.neighbors import DistanceMetric
+    return DistanceMetric.get_metric('geodesic').pairwise(X, Y)
 
 @_deprecate_positional_args
 def manhattan_distances(X, Y=None, *, sum_over_features=True):
@@ -1340,6 +1387,7 @@ def chi2_kernel(X, Y=None, gamma=1.):
     'manhattan': manhattan_distances,
     'precomputed': None,  # HACK: precomputed is always allowed, never called
     'nan_euclidean': nan_euclidean_distances,
+    'geodesic': geodesic_distances,
 }
 
 
@@ -1359,6 +1407,7 @@ def distance_metrics():
     'cosine'        metrics.pairwise.cosine_distances
     'euclidean'     metrics.pairwise.euclidean_distances
     'haversine'     metrics.pairwise.haversine_distances
+    'geodesic'      metrics.pairwise.geodesic_distances
     'l1'            metrics.pairwise.manhattan_distances
     'l2'            metrics.pairwise.euclidean_distances
     'manhattan'     metrics.pairwise.manhattan_distances
@@ -1440,7 +1489,7 @@ def _pairwise_callable(X, Y, metric, force_all_finite=True, **kwds):
                   'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
                   'russellrao', 'seuclidean', 'sokalmichener',
                   'sokalsneath', 'sqeuclidean', 'yule', "wminkowski",
-                  'nan_euclidean', 'haversine']
+                  'nan_euclidean', 'haversine', 'geodesic']
 
 _NAN_METRICS = ['nan_euclidean']
 
@@ -1694,6 +1743,9 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
       See the documentation for scipy.spatial.distance for details on these
       metrics. These metrics do not support sparse matrix inputs.
 
+    - From geopy.distance: ['geodesic']
+      See the documentation for geopy for details on this metric.
+
     Note that in the case of 'cityblock', 'cosine' and 'euclidean' (which are
     valid scipy.spatial.distance metrics), the scikit-learn implementation
     will be used, which is faster and has support for sparse matrices (except
diff --git a/sklearn/neighbors/_ball_tree.pyx b/sklearn/neighbors/_ball_tree.pyx
index 81ce9606f7b80..f9ee57aa43d7b 100644
--- a/sklearn/neighbors/_ball_tree.pyx
+++ b/sklearn/neighbors/_ball_tree.pyx
@@ -19,7 +19,8 @@ VALID_METRICS = ['EuclideanDistance', 'SEuclideanDistance',
                  'DiceDistance', 'KulsinskiDistance',
                  'RogersTanimotoDistance', 'RussellRaoDistance',
                  'SokalMichenerDistance', 'SokalSneathDistance',
-                 'PyFuncDistance', 'HaversineDistance']
+                 'PyFuncDistance', 'HaversineDistance',
+                 'GeodesicDistance']
 
 
 include "_binary_tree.pxi"
diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/neighbors/_dist_metrics.pyx
index 8bee948eeaeba..de9a378cf384a 100755
--- a/sklearn/neighbors/_dist_metrics.pyx
+++ b/sklearn/neighbors/_dist_metrics.pyx
@@ -4,6 +4,7 @@
 #cython: cdivision=True
 
 # By Jake Vanderplas (2013) <jakevdp@cs.washington.edu>
+# Geodesic hack by Chip Lynch (2021) <chip.lynch@louisville.edu> #needswork
 # written for the scikit-learn project
 # License: BSD
 
@@ -11,6 +12,7 @@ import numpy as np
 cimport numpy as np
 np.import_array()  # required in order to use C-API
 
+from geopy.distance import geodesic
 
 ######################################################################
 # Numpy 1.3-1.4 compatibility utilities
@@ -86,7 +88,9 @@ METRIC_MAPPING = {'euclidean': EuclideanDistance,
                   'sokalmichener': SokalMichenerDistance,
                   'sokalsneath': SokalSneathDistance,
                   'haversine': HaversineDistance,
-                  'pyfunc': PyFuncDistance}
+                  'pyfunc': PyFuncDistance,
+                  'geodesic': GeodesicDistance,
+                  'vincenty': GeodesicDistance}
 
 
 def get_valid_metric_ids(L):
@@ -410,6 +414,31 @@ cdef class DistanceMetric:
                        get_memview_DTYPE_2D(Darr))
         return Darr
 
+#------------------------------------------------------------
+# Geodesic Distance
+#  Accurate distance between two points on earth
+#     modeled as an Ellipsoid
+#     see HaversineDistance for a Spheroid earth distance
+cdef class GeodesicDistance(DistanceMetric):
+    r""" Geodesic Distance metric
+
+    Imported from geopy, see:
+    https://geopy.readthedocs.io/en/stable/#module-geopy.distance
+    """
+
+    # Late import to only happen if needed
+    from geopy.distance import geodesic
+
+    def __init__(self):
+        self.p = 2
+    
+    cdef DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
+                            ITYPE_t size) nogil except -1:
+        if size != 2:
+            with gil:
+                raise ValueError("Geodesic Distance takes only 2-dimensions (Latitude, Longitude)")
+        with gil:
+            return geodesic((x1[0], x1[1]),(x2[0], x2[1])).meters
 
 #------------------------------------------------------------
 # Euclidean Distance
diff --git a/sklearn/neighbors/_kd_tree.pyx b/sklearn/neighbors/_kd_tree.pyx
index bc1ab764a6fcf..d6c0c94a2d57a 100644
--- a/sklearn/neighbors/_kd_tree.pyx
+++ b/sklearn/neighbors/_kd_tree.pyx
@@ -12,7 +12,8 @@ __all__ = ['KDTree']
 DOC_DICT = {'BinaryTree': 'KDTree', 'binary_tree': 'kd_tree'}
 
 VALID_METRICS = ['EuclideanDistance', 'ManhattanDistance',
-                 'ChebyshevDistance', 'MinkowskiDistance']
+                 'ChebyshevDistance', 'MinkowskiDistance',
+                 'GeodesicDistance']
 
 
 include "_binary_tree.pxi"
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index bc1dc1aa53de3..73f2efec7c5ea 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -45,7 +45,8 @@ __all__ = ['TrilaterationIndex']
 DOC_DICT = {'TrilaterationIndex': 'TrilaterationIndex'}
 
 # Start simple:
-VALID_METRICS = ['EuclideanDistance']
+VALID_METRICS = ['EuclideanDistance', 'HaversineDistance',
+                 'GeodesicDistance']
 
 include "_helpers.pxi"
 
@@ -95,8 +96,7 @@ cdef class TrilaterationIndex:
         n_features = self.data_arr.shape[1]
 
         self.dist_metric = DistanceMetric.get_metric(metric, **kwargs)
-        # self.euclidean = (self.dist_metric.__class__.__name__
-        #                   == 'EuclideanDistance')
+
         metric = self.dist_metric.__class__.__name__
         if metric not in VALID_METRICS:
             raise ValueError('metric {metric} is not valid for '
@@ -208,7 +208,7 @@ cdef class TrilaterationIndex:
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
 
         # Establish the distances from the query point to the reference points
-        cdef np.ndarray q_dists
+        cdef np.ndarray[DTYPE_t, ndim=2] q_dists
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
 
         # cdef DTYPE_t[:, ::1] Xarr

From 1f5257d802903113dfd05d2b38709e298c212b03 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Thu, 11 Mar 2021 23:30:54 -0800
Subject: [PATCH 15/19] Geodesic benchmarks look goood!

---
 perftest_geodesic.py                 | 35 ++++++++++++++++-----------
 sklearn/neighbors/_dist_metrics.pyx  |  2 +-
 sklearn/neighbors/_trilateration.pyx | 36 +++++++++++++++++-----------
 3 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/perftest_geodesic.py b/perftest_geodesic.py
index 6ecc75d1f3ea8..0080f00aa6a23 100644
--- a/perftest_geodesic.py
+++ b/perftest_geodesic.py
@@ -27,6 +27,9 @@
         # print(row)
         samples.append([row['Latitude'], row['Longitude']])
 
+print(min([x[0] for x in samples]), max([x[0] for x in samples]))
+print(min([x[1] for x in samples]), max([x[1] for x in samples]))
+
 with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
     reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
     for row in reader:
@@ -38,6 +41,11 @@
 
 print(f"{time.time() - start} seconds since start")
 
+from sklearn.metrics.pairwise import geodesic_distances
+bsas = [-34.83333, -58.5166646]
+paris = [49.0083899664, 2.53844117956]
+result = geodesic_distances([bsas, paris])
+print(f"{result} meters between bsas and paris")
 
 brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute', metric='geodesic')
 brute.fit(samples)
@@ -85,30 +93,29 @@
 # print(f"timing brute force:")
 # cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=20)', globals(), locals(), "QBFProfile.prof")
 
-print("timing kd tree")
-cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile_geo.prof")
-
+# print("timing kd tree")
+# cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile_geo.prof")
 
-print("timing trilat.query")
-cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=20)', globals(), locals(), "QTRProfile_geo.prof")
+# print("timing trilat.query")
+# cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=20)', globals(), locals(), "QTRProfile_geo.prof")
 
-print("timing query_expand")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT3Profile_geo.prof")
+# print("timing query_expand")
+# cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT3Profile_geo.prof")
 
-print("timing query_expand_2")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT4Profile_geo.prof")
+# print("timing query_expand_2")
+# cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT4Profile_geo.prof")
 
-s = pstats.Stats("QBFProfile.prof")
+s = pstats.Stats("QBFProfile_geo.prof")
 s.strip_dirs().sort_stats("cumtime").print_stats()
 
-s = pstats.Stats("QKDProfile.prof")
+s = pstats.Stats("QKDProfile_geo.prof")
 s.strip_dirs().sort_stats("cumtime").print_stats()
 
-s = pstats.Stats("QTRProfile.prof")
+s = pstats.Stats("QTRProfile_geo.prof")
 s.strip_dirs().sort_stats("cumtime").print_stats()
 
-s = pstats.Stats("QT3Profile.prof")
+s = pstats.Stats("QT3Profile_geo.prof")
 s.strip_dirs().sort_stats("cumtime").print_stats()
 
-s = pstats.Stats("QT4Profile.prof")
+s = pstats.Stats("QT4Profile_geo.prof")
 s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/neighbors/_dist_metrics.pyx
index de9a378cf384a..e466e435b8910 100755
--- a/sklearn/neighbors/_dist_metrics.pyx
+++ b/sklearn/neighbors/_dist_metrics.pyx
@@ -438,7 +438,7 @@ cdef class GeodesicDistance(DistanceMetric):
             with gil:
                 raise ValueError("Geodesic Distance takes only 2-dimensions (Latitude, Longitude)")
         with gil:
-            return geodesic((x1[0], x1[1]),(x2[0], x2[1])).meters
+            return geodesic((x1[0], x1[1]),(x2[0], x2[1])).kilometers
 
 #------------------------------------------------------------
 # Euclidean Distance
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 73f2efec7c5ea..9f889ea029ffa 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -61,6 +61,8 @@ cdef class TrilaterationIndex:
     cdef np.ndarray r_indexes
     cdef np.ndarray r_distances
 
+    cdef char* metric
+
     cdef readonly ITYPE_t[:, ::1] r_indexes_mv
     cdef readonly DTYPE_t[:, ::1] r_distances_mv
 
@@ -80,18 +82,6 @@ cdef class TrilaterationIndex:
     def __init__(self, data, ref=None, metric='euclidean', **kwargs):
         self.data_arr = check_array(data, dtype=DTYPE, order='C')
 
-        # Ref points can be passed in
-        # Otherwise they are generated based on input data
-        if ref is None:
-            ref = self._create_ref_points()
-            # print("generated synthetic reference points:")
-            # print(ref)
-
-        self.ref_points_arr = check_array(ref, dtype=DTYPE, order='C')
-
-        if self.data_arr.shape[1] != self.ref_points_arr.shape[1]:
-            raise ValueError("Data and reference points must share same dimension")
-
         n_samples = self.data_arr.shape[0]
         n_features = self.data_arr.shape[1]
 
@@ -104,8 +94,20 @@ cdef class TrilaterationIndex:
                                                    **DOC_DICT))
         self.dist_metric._validate_data(self.data_arr)
 
+        # Ref points can be passed in
+        # Otherwise they are generated based on input data
+        if ref is None:
+            ref = self._create_ref_points()
+            # print("generated synthetic reference points:")
+            # print(ref)
+
+        self.ref_points_arr = check_array(ref, dtype=DTYPE, order='C')
+
         self.ref_dists = self.dist_metric.pairwise(self.data_arr, self.ref_points_arr)
 
+        if self.data_arr.shape[1] != self.ref_points_arr.shape[1]:
+            raise ValueError("Data and reference points must share same dimension")
+
         # allocate arrays for storage
         self.idx_array_arr = np.arange(n_samples, dtype=ITYPE)
 
@@ -140,11 +142,17 @@ cdef class TrilaterationIndex:
 
         For the love of all that is holy, make this better.
         """
-
         cdef DTYPE_t MAX_REF = 9999.0
 
         self.ndims = self.data_arr.shape[1]
-        refs = [[0]*i+[MAX_REF]*(self.ndims-i) for i in range(self.ndims)]
+
+        print(f"self.dist_metric.__class__.__name__: {self.dist_metric.__class__.__name__}")
+
+        if self.dist_metric.__class__.__name__ == 'GeodesicDistance' or \
+                self.dist_metric.__class__.__name__  == 'HaversineDistance':
+            refs = [[90, 0], [38.26, -85.76], [-19.22, 159.93]]
+        else:
+            refs = [[0]*i+[MAX_REF]*(self.ndims-i) for i in range(self.ndims)]
         return np.asarray(refs)
 
 

From 6e12412731dc61648af5ffa9c6af4f399e83c418 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Sat, 13 Mar 2021 12:23:49 -0800
Subject: [PATCH 16/19] Performance tests working for geodesic

---
 perftest_geodesic.py                 | 16 ++++++----------
 perftest_query.py                    | 10 ++++------
 sklearn/neighbors/_trilateration.pyx |  9 +++------
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/perftest_geodesic.py b/perftest_geodesic.py
index 0080f00aa6a23..0cdc1909a8b2b 100644
--- a/perftest_geodesic.py
+++ b/perftest_geodesic.py
@@ -64,34 +64,30 @@
 # t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
 # print(f"Trilat 500x single point time: {t}")
 tr = trilat.query(querypoint, k=100)
-print(f"trilat query_radius: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
+print(f"trilat.query: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
 
 
 t3 = trilat.query_expand(querypoint, k=100)
-print(f"trilat query_radius_t3: {t3} ({len(t3)}) - at time {time.time() - start} seconds")
+print(f"trilat.query_expand (t3): {t3} ({len(t3)}) - at time {time.time() - start} seconds")
 
 t4 = trilat.query_expand_2(querypoint, k=100)
-print(f"trilat query_expand_2: {t4} ({len(t4)}) - at time {time.time() - start} seconds")
+print(f"trilat.query_expand_2 (t4): {t4} ({len(t4)}) - at time {time.time() - start} seconds")
 
 print(f"length match 1: {len(np.intersect1d(tq[1], tr[1]))}")
 print(f"length match 2: {len(np.intersect1d(tq[1], t3[1]))}")
 print(f"length match 4: {len(np.intersect1d(tq[1], t4[1]))}")
 
-# exit()
-
 print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
 if 18 in tq[0]:
     print("18 in tq")
 if 18 in t3[1]:
     print("18 in t3")
 
+# exit()
 
-
-# print(type(querypoint))
-# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
-
+# uncomment to rerun, otherwise just print results:
 # print(f"timing brute force:")
-# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=20)', globals(), locals(), "QBFProfile.prof")
+# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=20)', globals(), locals(), "QBFProfile_geo.prof")
 
 # print("timing kd tree")
 # cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile_geo.prof")
diff --git a/perftest_query.py b/perftest_query.py
index ca4cefbeb0434..53d951ece0ab5 100644
--- a/perftest_query.py
+++ b/perftest_query.py
@@ -80,18 +80,16 @@
 # print(type(querypoint))
 # cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
 
-# print(f"timing brute force:")
-# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=500)', globals(), locals(), "QBFProfile.prof")
-print("running one kd tree")
-tree.query(querypoint, k=100)
+print(f"timing brute force:")
+cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=500)', globals(), locals(), "QBFProfile.prof")
 
 print("timing kd tree")
 # cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=500)', globals(), locals(), "QKDProfile.prof")
 cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile.prof")
 
 
-# print("timing trilat.query")
-# cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=500)', globals(), locals(), "QTRProfile.prof")
+print("timing trilat.query")
+cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=500)', globals(), locals(), "QTRProfile.prof")
 
 print("timing query_expand")
 cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=500)', globals(), locals(), "QT3Profile.prof")
diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 9f889ea029ffa..a844d269ce991 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -27,8 +27,6 @@ from collections import Counter
 from functools import reduce
 from libc.math cimport fabs, ceil
 
-from scipy.spatial.distance import cdist
-
 from ..utils import (
     check_array
 )
@@ -227,8 +225,7 @@ cdef class TrilaterationIndex:
         cdef DTYPE_t best_dist, test_dist
 
         best_idx = _find_nearest_sorted_2D(self.distances, q_dists[0,0])
-        # best_dist = self.dist_metric.dist(self.data[best_idx,:], &Xarr, X.shape[1])
-        best_dist = cdist([self.data_arr[self.idx_array_arr[best_idx],:]], X)
+        best_dist = self.dist_metric.pairwise([self.data_arr[self.idx_array_arr[best_idx],:]], X)
         # heap.push(0, best_dist, best_idx)
 
         # Populate the heap using 2k elements; k on each side of our first guess:
@@ -236,7 +233,7 @@ cdef class TrilaterationIndex:
         high_idx = min(best_idx + k, self.distances.shape[0])
         for i in range(low_idx, high_idx + 1):
             if i < self.distances.shape[0]:
-                test_dist = cdist([self.data_arr[self.idx_array_arr[i],:]], X)
+                test_dist = self.dist_metric.pairwise([self.data_arr[self.idx_array_arr[i],:]], X)
                 heap.push(0, test_dist, self.idx_array_arr[i])
 
 
@@ -283,7 +280,7 @@ cdef class TrilaterationIndex:
                     break
             
             if sufficient:
-                test_dist = cdist([self.data_arr[self.idx_array[test_idx],:]], X)
+                test_dist = self.dist_metric.pairwise([self.data_arr[self.idx_array[test_idx],:]], X)
                 # print(f"{test_idx} is sufficient... test_dist is: {test_dist}")
                 if test_dist < heap.largest(0):
                     heap.push(0, test_dist, self.idx_array[test_idx])

From 6d8cc746f10a34ccf465831379ff0050ffc09f75 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Sat, 13 Mar 2021 12:27:33 -0800
Subject: [PATCH 17/19] Moved perftest scripts to Trilateration project

---
 perftest_geodesic.py | 117 -------------------------------------------
 perftest_query.py    | 113 -----------------------------------------
 perftest_radius.py   |  84 -------------------------------
 3 files changed, 314 deletions(-)
 delete mode 100644 perftest_geodesic.py
 delete mode 100644 perftest_query.py
 delete mode 100644 perftest_radius.py

diff --git a/perftest_geodesic.py b/perftest_geodesic.py
deleted file mode 100644
index 0cdc1909a8b2b..0000000000000
--- a/perftest_geodesic.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import csv
-import sklearn
-import numpy as np
-import timeit
-import time
-
-from scipy.spatial.distance import cdist
-from geopy.distance import geodesic
-
-
-import pstats, cProfile
-import pyximport
-pyximport.install
-
-import sklearn
-
-start = time.time()
-
-querypoint = np.asarray([[38.25, -85.50]])
-
-samples = []
-refpoints = []
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        # print(row)
-        samples.append([row['Latitude'], row['Longitude']])
-
-print(min([x[0] for x in samples]), max([x[0] for x in samples]))
-print(min([x[1] for x in samples]), max([x[1] for x in samples]))
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        print(row)
-        refpoints.append([row['Latitude'], row['Longitude']])
-
-from sklearn.neighbors import NearestNeighbors, KDTree
-from sklearn.neighbors import TrilaterationIndex
-
-print(f"{time.time() - start} seconds since start")
-
-from sklearn.metrics.pairwise import geodesic_distances
-bsas = [-34.83333, -58.5166646]
-paris = [49.0083899664, 2.53844117956]
-result = geodesic_distances([bsas, paris])
-print(f"{result} meters between bsas and paris")
-
-brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute', metric='geodesic')
-brute.fit(samples)
-BF = brute.kneighbors(querypoint, 100)
-print(f"brute force results: {BF[1][0]} ({len(BF[1][0])}) - at time {time.time() - start} seconds")
-
-
-tree = KDTree(samples, metric='geodesic')
-# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
-# print(f"KDTree 500x single point time: {t}")
-tq = tree.query(querypoint, k=100)
-print(f"tree query results: {tq} ({len(tq[0])}) - at time {time.time() - start} seconds")
-
-
-trilat = TrilaterationIndex(samples, metric='geodesic')
-# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
-# print(f"Trilat 500x single point time: {t}")
-tr = trilat.query(querypoint, k=100)
-print(f"trilat.query: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
-
-
-t3 = trilat.query_expand(querypoint, k=100)
-print(f"trilat.query_expand (t3): {t3} ({len(t3)}) - at time {time.time() - start} seconds")
-
-t4 = trilat.query_expand_2(querypoint, k=100)
-print(f"trilat.query_expand_2 (t4): {t4} ({len(t4)}) - at time {time.time() - start} seconds")
-
-print(f"length match 1: {len(np.intersect1d(tq[1], tr[1]))}")
-print(f"length match 2: {len(np.intersect1d(tq[1], t3[1]))}")
-print(f"length match 4: {len(np.intersect1d(tq[1], t4[1]))}")
-
-print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
-if 18 in tq[0]:
-    print("18 in tq")
-if 18 in t3[1]:
-    print("18 in t3")
-
-# exit()
-
-# uncomment to rerun, otherwise just print results:
-# print(f"timing brute force:")
-# cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=20)', globals(), locals(), "QBFProfile_geo.prof")
-
-# print("timing kd tree")
-# cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile_geo.prof")
-
-# print("timing trilat.query")
-# cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=20)', globals(), locals(), "QTRProfile_geo.prof")
-
-# print("timing query_expand")
-# cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT3Profile_geo.prof")
-
-# print("timing query_expand_2")
-# cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=20)', globals(), locals(), "QT4Profile_geo.prof")
-
-s = pstats.Stats("QBFProfile_geo.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QKDProfile_geo.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QTRProfile_geo.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QT3Profile_geo.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QT4Profile_geo.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/perftest_query.py b/perftest_query.py
deleted file mode 100644
index 53d951ece0ab5..0000000000000
--- a/perftest_query.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import csv
-import sklearn
-import numpy as np
-import timeit
-import time
-
-from scipy.spatial.distance import cdist
-
-import pstats, cProfile
-import pyximport
-pyximport.install
-
-import sklearn
-
-start = time.time()
-
-querypoint = np.asarray([[38.25, -85.50]])
-
-samples = []
-refpoints = []
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        # print(row)
-        samples.append([row['Latitude'], row['Longitude']])
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        print(row)
-        refpoints.append([row['Latitude'], row['Longitude']])
-
-from sklearn.neighbors import NearestNeighbors, KDTree
-from sklearn.neighbors import TrilaterationIndex
-
-print(f"{time.time() - start} seconds since start")
-
-
-brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute', metric='euclidean')
-brute.fit(samples)
-BF = brute.kneighbors(querypoint, 100)
-print(f"brute force results: {BF[1][0]} ({len(BF[1][0])}) - at time {time.time() - start} seconds")
-
-
-tree = KDTree(samples, metric='euclidean')
-# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
-# print(f"KDTree 500x single point time: {t}")
-tq = tree.query(querypoint, k=100)
-print(f"tree query results: {tq} ({len(tq[0])}) - at time {time.time() - start} seconds")
-
-
-trilat = TrilaterationIndex(samples, metric='euclidean')
-# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
-# print(f"Trilat 500x single point time: {t}")
-tr = trilat.query(querypoint, k=100)
-print(f"trilat query_radius: {tr} ({len(tr)}) - at time {time.time() - start} seconds")
-
-
-t3 = trilat.query_expand(querypoint, k=100)
-print(f"trilat query_radius_t3: {t3} ({len(t3)}) - at time {time.time() - start} seconds")
-
-t4 = trilat.query_expand_2(querypoint, k=100)
-print(f"trilat query_expand_2: {t4} ({len(t4)}) - at time {time.time() - start} seconds")
-
-print(f"length match 1: {len(np.intersect1d(tq[1], tr[1]))}")
-print(f"length match 2: {len(np.intersect1d(tq[1], t3[1]))}")
-print(f"length match 4: {len(np.intersect1d(tq[1], t4[1]))}")
-
-# exit()
-
-print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
-if 18 in tq[0]:
-    print("18 in tq")
-if 18 in t3[1]:
-    print("18 in t3")
-
-
-
-# print(type(querypoint))
-# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
-
-print(f"timing brute force:")
-cProfile.runctx('timeit.timeit(lambda: brute.kneighbors(querypoint, 100), number=500)', globals(), locals(), "QBFProfile.prof")
-
-print("timing kd tree")
-# cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=500)', globals(), locals(), "QKDProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: tree.query(querypoint, k=100), number=20)', globals(), locals(), "QKDProfile.prof")
-
-
-print("timing trilat.query")
-cProfile.runctx('timeit.timeit(lambda: trilat.query(querypoint, k=100), number=500)', globals(), locals(), "QTRProfile.prof")
-
-print("timing query_expand")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_expand(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=500)', globals(), locals(), "QT3Profile.prof")
-
-print("timing query_expand_2")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_expand_2(querypoint, k=100, mfudge=5, miter=20, sscale=2), number=500)', globals(), locals(), "QT4Profile.prof")
-
-s = pstats.Stats("QBFProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QKDProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QTRProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QT3Profile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("QT4Profile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file
diff --git a/perftest_radius.py b/perftest_radius.py
deleted file mode 100644
index 1775ab08b1718..0000000000000
--- a/perftest_radius.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import csv
-import sklearn
-import numpy as np
-import timeit
-
-from scipy.spatial.distance import cdist
-
-querypoint = np.asarray([[38.25, -85.50]])
-
-samples = []
-refpoints = []
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/lat_long_synthetic.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        # print(row)
-        samples.append([row['Latitude'], row['Longitude']])
-
-with open('/home/chipmonkey/repos/TrilaterationIndex/data/ref_points.csv') as csvfile:
-    reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
-    for row in reader:
-        print(row)
-        refpoints.append([row['Latitude'], row['Longitude']])
-
-from sklearn.neighbors import NearestNeighbors, KDTree
-from sklearn.neighbors import TrilaterationIndex
-
-brute = NearestNeighbors(n_neighbors=1000, radius=0.07, algorithm='brute')
-brute.fit(samples)
-BF = brute.radius_neighbors(querypoint, 0.1)
-print(f"brute force results: {BF[1][0]} ({len(BF[1][0])})")
-
-
-tree = KDTree(samples)
-# t = timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)
-# print(f"KDTree 500x single point time: {t}")
-tq = tree.query_radius(querypoint, r=0.1)
-print(f"tree query results: {tq} ({len(tq[0])})")
-
-
-trilat = TrilaterationIndex(samples)
-# t = timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)
-# print(f"Trilat 500x single point time: {t}")
-tr = trilat.query_radius(querypoint, r=0.1)
-print(f"trilat query_radius: {tr} ({len(tr)})")
-
-
-t3 = trilat.query_radius_t3(querypoint, r=0.1)
-print(f"trilat query_radius_t3: {t3} ({len(t3)})")
-
-print(f"length match 1: {len(np.intersect1d(tq[0], tr))}")
-print(f"length match 2: {len(np.intersect1d(tq[0], t3))}")
-
-print(f"setdiff: {np.setdiff1d(tq[0], t3)}")
-if 18 in tq[0]:
-    print("18 in tq")
-if 18 in t3:
-    print("18 in t3")
-
-
-import pstats, cProfile
-import pyximport
-pyximport.install
-
-import sklearn
-# print(type(querypoint))
-# cProfile.runctx('trilat.query_radius(querypoint, r=0.07)', globals(), locals(), "Profile.prof")
-
-cProfile.runctx('timeit.timeit(lambda: brute.radius_neighbors(querypoint, 0.07), number=500)', globals(), locals(), "BFProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: tree.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "KDProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_radius(querypoint, r=0.07), number=500)', globals(), locals(), "TRProfile.prof")
-cProfile.runctx('timeit.timeit(lambda: trilat.query_radius_t3(querypoint, r=0.07), number=500)', globals(), locals(), "T3Profile.prof")
-
-s = pstats.Stats("BFProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("KDProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("TRProfile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
-
-s = pstats.Stats("T3Profile.prof")
-s.strip_dirs().sort_stats("cumtime").print_stats()
\ No newline at end of file

From 32c42e7f120c7715d76bac6f16222b06e3a7d5f4 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Mon, 15 Mar 2021 19:40:16 -0700
Subject: [PATCH 18/19] Fix bug that caused segfault

---
 sklearn/neighbors/_trilateration.pyx | 35 ++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index a844d269ce991..2c0538d7f6ee2 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -144,7 +144,7 @@ cdef class TrilaterationIndex:
 
         self.ndims = self.data_arr.shape[1]
 
-        print(f"self.dist_metric.__class__.__name__: {self.dist_metric.__class__.__name__}")
+        # print(f"self.dist_metric.__class__.__name__: {self.dist_metric.__class__.__name__}")
 
         if self.dist_metric.__class__.__name__ == 'GeodesicDistance' or \
                 self.dist_metric.__class__.__name__  == 'HaversineDistance':
@@ -179,8 +179,10 @@ cdef class TrilaterationIndex:
         """
 
         if isinstance(X, list):
+            # print("check 1")
             results = self._query_one(np.asarray(X), k, return_distance)
         elif X.shape[0] == 1:
+            # print("check 2")
             results=self._query_one(X, k, return_distance)
         else:
             raise NotImplementedError("Not yet")
@@ -189,7 +191,6 @@ cdef class TrilaterationIndex:
             #                   return_distance=return_distance,
             #                   sort_results=sort_results) \
             #            for x in X]
-
         return results
 
     cdef _query_one(self, X, k=5,
@@ -202,6 +203,7 @@ cdef class TrilaterationIndex:
                              "use query for multiple records")
 
         if X.shape[X.ndim - 1] != self.data.shape[1]:
+            # print(f"X is : {X}")
             raise ValueError("query data dimension must "
                              "match training data dimension")
         
@@ -209,14 +211,19 @@ cdef class TrilaterationIndex:
             raise ValueError("k must be less than or equal "
                              "to the number of training points")
 
+        # print(f"Check: X is {X}")
         # Can probably improve this - using a heap that allows more than 1 value
         # but we're always only using one here (X.shape[0] must be 1 from above)
         cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
 
+        # print(f"Check - Built NeighborsHeap")
+
         # Establish the distances from the query point to the reference points
         cdef np.ndarray[DTYPE_t, ndim=2] q_dists
         q_dists = self.dist_metric.pairwise(X, self.ref_points_arr)
 
+        # print(f"Check - built q_dists")
+
         # cdef DTYPE_t[:, ::1] Xarr
         # Xarr = get_memview_DTYPE_2D(np.asarray(X))
         # Xarr = np.asarray(X, dtype=DTYPE, order='C')
@@ -228,17 +235,20 @@ cdef class TrilaterationIndex:
         best_dist = self.dist_metric.pairwise([self.data_arr[self.idx_array_arr[best_idx],:]], X)
         # heap.push(0, best_dist, best_idx)
 
+
         # Populate the heap using 2k elements; k on each side of our first guess:
         low_idx = max(best_idx - k, 0)
         high_idx = min(best_idx + k, self.distances.shape[0])
+        # print("Check - ready for loop")
         for i in range(low_idx, high_idx + 1):
+            # print(f"Check - Loop {i}")
             if i < self.distances.shape[0]:
                 test_dist = self.dist_metric.pairwise([self.data_arr[self.idx_array_arr[i],:]], X)
                 heap.push(0, test_dist, self.idx_array_arr[i])
 
-
+        # print("Check - out of loop")
         # Establish bounds between which to search
-        if heap.largest(0) != np.inf:
+        if heap.largest(0) == np.inf:  # This was != before... bug?
             low_idx_possible = 0
             high_idx_possible = self.data_arr.shape[0]
         else:
@@ -249,20 +259,26 @@ cdef class TrilaterationIndex:
         # So we test more than one new point at a time
         test_idx = best_idx
 
+        # print(f"Check - low_idx_possible: {low_idx_possible}, high_idx_possible: {high_idx_possible}")
+
         while True:
+            # print(f"Check - Entering while loop with low_idx: {low_idx}, high_idx: {high_idx}")
             if low_idx <= low_idx_possible and high_idx >= high_idx_possible:
                 break
 
             # Determine whether the next high or low point is a better test:
             lowdelta = fabs(self.distances[low_idx, 0] - q_dists[0, 0])
             highdelta = fabs(self.distances[high_idx, 0] - q_dists[0, 0])
-            if lowdelta <= highdelta and low_idx >= low_idx_possible:
+            if lowdelta <= highdelta and low_idx > low_idx_possible:
+                # print("check a")
                 test_idx = low_idx
                 low_idx = low_idx - 1
-            elif high_idx <= high_idx_possible:
+            elif high_idx < high_idx_possible:
+                # print("check b")
                 test_idx = high_idx
                 high_idx = high_idx + 1
-            elif low_idx >= low_idx_possible:
+            elif low_idx > low_idx_possible:
+                # print("check c")
                 test_idx = low_idx
                 low_idx = low_idx - 1
             else:
@@ -273,6 +289,7 @@ cdef class TrilaterationIndex:
             # Check that all pre-calculated distances are better than best
             sufficient = True
 
+            # print("Check - checking sufficient")
             for d, q in zip(self.distances[test_idx,1:], q_dists[0,1:]):
                 # print(f"testing that: {abs(d-q)} < {heap.largest(0)}")
                 if abs(d-q) > heap.largest(0):
@@ -290,7 +307,7 @@ cdef class TrilaterationIndex:
                     high_idx_possible = _find_nearest_sorted_2D(self.distances, q_dists[0, 0] + heap.largest(0))
 
             continue
-
+        # print("Check - out of while")
         # return (self.idx_array[best_idx], best_dist)
         return heap.get_arrays()
 
@@ -727,8 +744,6 @@ cdef class TrilaterationIndex:
         for i in range(len(dist_results)):
             # print(f"{i} (count: {count}) testing point {approx_idxs[i]} with dist: {dist_results[i]}")
             if dist_results[i] <= r:
-                # if approx_idxs[i] == 18:
-                #     print(f"adding {approx_idxs[i]}")
                 result_arr[count] = approx_idxs[i]
                 count += 1
 

From e3f65f52f42c0bb16e580827f546868b9de0f942 Mon Sep 17 00:00:00 2001
From: Chip Lynch <chip@chiplynch.com>
Date: Mon, 22 Mar 2021 14:31:30 -0700
Subject: [PATCH 19/19] Mostly fixed bug causing query_expand to return no
 results

It's possible that if the initial guess is too low, the
_query_expand function will iterate beyond its MAX_ITER limit
before finding a single approximate match.

This is patched by forcing the initial guess function to report
a minimum radius of 0.5, but this could be problematic in certain
coordinate systems.
---
 sklearn/neighbors/_trilateration.pyx | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/sklearn/neighbors/_trilateration.pyx b/sklearn/neighbors/_trilateration.pyx
index 2c0538d7f6ee2..0cd70497dadff 100644
--- a/sklearn/neighbors/_trilateration.pyx
+++ b/sklearn/neighbors/_trilateration.pyx
@@ -326,10 +326,14 @@ cdef class TrilaterationIndex:
         
         ground_idx = _find_nearest_sorted_2D(self.distances, q_dists[0, 0])
         # ground_idx = np.searchsorted(self.distances, q_dists[0, 0], side="left")
-        low_idx = ground_idx - (k//2)
-        high_idx = ground_idx + ceil(k/2)
+        low_idx = max(ground_idx - k, 0)
+        high_idx = min(ground_idx + k, self.data_arr.shape[0])
+
+        # Values were insanely low - like 1e-07.  For now, forcing min of 0.5.  Consider improvements here.
+        # print(f"""get_start_dist: ({low_idx}, {high_idx}), {self.distances[high_idx, 0]}, {self.distances[low_idx, 0]}
+        #         {self.distances[high_idx, 0] - self.distances[low_idx, 0]}""")
 
-        return(self.distances[high_idx, 0] - self.distances[low_idx, 0])
+        return(min(0.5, self.distances[high_idx, 0] - self.distances[low_idx, 0]))
 
     def query_expand(self, X, k, return_distance=True, sort_results=True,
                      mfudge=5.0, miter=20, sscale=2.0):
@@ -421,12 +425,16 @@ cdef class TrilaterationIndex:
                 if new_radius > too_high:
                     new_radius = (too_high + too_low) / 2
                 radius = new_radius
+                # print(f"too small... new radius: {radius}")
                 # approx_array = self._query_radius_at_least_k(X, radius, k)
                 approx_array = self._query_radius_r0_only(X, radius)
                 approx_count = approx_array.shape[0]
+                # print(f"approx_count: {approx_count}")
                 continue
             elif approx_count > k * MAX_FUDGE:
+                too_high = radius
                 radius = (radius + too_low) / STD_SCALE
+                # print(f"too big... new radius: {radius}")
                 # approx_array = self._query_radius_at_least_k(X, radius, k)
                 approx_array = self._query_radius_r0_only(X, radius)
                 approx_count = approx_array.shape[0]