Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,8 @@ code-analysis:

flake8-diff:
git diff upstream/main -u -- "*.py" | flake8 --diff

install:
. ./venv38/bin/activate ; pip install --verbose --no-build-isolation --editable .
install-test:
python -c "import sklearn; sklearn.show_versions()"
123 changes: 123 additions & 0 deletions debug_query_radius.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# File to debug
# This file should never be published...
# Purge it from git before merging

import csv
import sklearn
import numpy as np

from scipy.spatial.distance import cdist

# samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
querypoint = [[50, 65]]

samples = []
refpoints = []

with open('/home/chipmonkey/repos/TrilaterationIndex/data/point_sample_10.csv') as csvfile:
reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
for row in reader:
print(row)
samples.append([row['x'], row['y']])

with open('/home/chipmonkey/repos/TrilaterationIndex/data/sample_ref_points.csv') as csvfile:
reader = csv.DictReader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
for row in reader:
print(row)
refpoints.append([row['x'], row['y']])

print(refpoints)

print(samples)
print("Actual distances:")
print(cdist(samples, querypoint))

from sklearn.neighbors import NearestNeighbors, KDTree
from sklearn.neighbors import TrilaterationIndex

tree = KDTree(samples)
tq = tree.query_radius(querypoint, r=20)
print("tree query results:")
print(tq)

neigh = NearestNeighbors(radius=10)
neigh.fit(samples)

print("finding neighbors within radius:")
rng = neigh.radius_neighbors([[44., 44.]])
print("distances:")
print(np.asarray(rng[0][0]))
print("indexes:")
print(list(np.asarray(rng[1][0])))
print("points:")
things = [samples[x] for x in list(np.asarray(rng[1][0]))]
print(things)
# [samples[x] for x in list(np.asarray(rng[1][0]))]
# print(samples[list(np.asarray(rng[1][0]))])

print(type(tree))
print(type(tree.data))
# dist, ind = tree.query
print(np.asarray(tree.node_data)) # Ohhh https://stackoverflow.com/questions/20978377/cython-convert-memory-view-to-numpy-array
print(tree.node_data[0])

ti = TrilaterationIndex(samples, refpoints)



print(np.asarray(ti.ref_points))
print(np.asarray(ti.idx_array))
print("distances:")
print(np.asarray(ti.distances))

print(ti.ref_points.shape[0])
print(ti.ref_points.shape[1])

result1 = ti.query_radius_approx(np.asarray(querypoint), 20)
print(f"query_radius_approx: {result1}")


result2 = ti.query_radius_approx(np.asarray(querypoint), 20)
print(f"query_radius: {result2}")

rq = ti.query_radius(np.asarray(querypoint), 20)
print(f"rq: {rq}")

print("------------")
print("single point tests")
r4 = ti.query(querypoint, 3)
ids, dists = r4
print(f"query: {r4}")
print(f"indexes: {ids}, dists: {dists}")

r3 = ti.query_expand(querypoint, 3)
ids, dists = r3
print(f"query_expand: {r3}")
print(f"indexes: {np.asarray(ids)}", dists)

print("Passed Single Point tests")
print("------------")
print("Starting multi point tests")

querypoints = np.asarray([np.asarray([50, 65]),
np.asarray([55, 70]),
np.asarray([45, 80])])

r4 = ti.query(querypoints, 3)
print(f"query results r4: {r4}")
print(f"r4.shape: {np.asarray(r4).shape}")

ids, dists = r4
print(f"indexes: {ids}, dists: {dists}")

r3 = ti.query_expand(querypoints, 3)
ids, dists = r3
print(f"query_expand: {r3}")
print(f"indexes: {np.asarray(ids)}", dists)

# [(array([[14.30117536, 15.82017495, 16.12663017]]), array([[0, 5, 3]])),
# (array([[16.08773716, 16.85576594, 16.85576594]]), array([[5, 0, 0]])),
# (array([[ 4.37084771, 23.74124599, 23.74124599]]), array([[5, 3, 3]]))]

# cython sklearn/neighbors/_trilateration.pyx -a
# cython sklearn/neighbors/_kd_tree.pyx -a
1 change: 1 addition & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# is relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
sys.path.insert(0, os.path.abspath('sphinxext'))
sys.path.insert(0, os.path.abspath('../venv38/lib/python3.8/site-packages/'))

from github_link import make_linkcode_resolve
import sphinx_gallery
Expand Down
54 changes: 53 additions & 1 deletion sklearn/metrics/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,53 @@ def haversine_distances(X, Y=None):
from sklearn.neighbors import DistanceMetric
return DistanceMetric.get_metric('haversine').pairwise(X, Y)

def geodesic_distances(X, Y=None):
"""Compute the Geodesic distance between samples in X and Y.

The geodesic distance is an accurate measure of the distance between
two points on the earth, using an ellipsoidal model of the earth
which is more accurate than the spherical model used in Haversine.

The trade-off is that no closed form calculation exists, so the
process is iterative, using numeric techniques until a specific
accuracy is reached.

See the geopy.distance.geodesic code for implementation details.

Parameters
----------
X : array-like of shape (n_samples_X, 2)

Y : array-like of shape (n_samples_Y, 2), default=None

Returns
-------
distance : ndarray of shape (n_samples_X, n_samples_Y)

Notes
-----
This will be significantly slower than the Haversine distance and
should only be used when more accuracy is needed.

Unlike Haversine, which requires input in radians, the geodesic
implementation works on (latitude, longitude) pairs.

Examples
--------
We want to calculate the distance between the Ezeiza Airport
(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,
France).

>>> from sklearn.metrics.pairwise import geodesic_distances
>>> bsas = [-34.83333, -58.5166646]
>>> paris = [49.0083899664, 2.53844117956]
>>> result = geodesic_distances([bsas, paris])
>>> result
array([[ 0. , 11099.54035582],
[11099.54035582, 0. ]])
"""
from sklearn.neighbors import DistanceMetric
return DistanceMetric.get_metric('geodesic').pairwise(X, Y)

@_deprecate_positional_args
def manhattan_distances(X, Y=None, *, sum_over_features=True):
Expand Down Expand Up @@ -1340,6 +1387,7 @@ def chi2_kernel(X, Y=None, gamma=1.):
'manhattan': manhattan_distances,
'precomputed': None, # HACK: precomputed is always allowed, never called
'nan_euclidean': nan_euclidean_distances,
'geodesic': geodesic_distances,
}


Expand All @@ -1359,6 +1407,7 @@ def distance_metrics():
'cosine' metrics.pairwise.cosine_distances
'euclidean' metrics.pairwise.euclidean_distances
'haversine' metrics.pairwise.haversine_distances
'geodesic' metrics.pairwise.geodesic_distances
'l1' metrics.pairwise.manhattan_distances
'l2' metrics.pairwise.euclidean_distances
'manhattan' metrics.pairwise.manhattan_distances
Expand Down Expand Up @@ -1440,7 +1489,7 @@ def _pairwise_callable(X, Y, metric, force_all_finite=True, **kwds):
'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
'russellrao', 'seuclidean', 'sokalmichener',
'sokalsneath', 'sqeuclidean', 'yule', "wminkowski",
'nan_euclidean', 'haversine']
'nan_euclidean', 'haversine', 'geodesic']

_NAN_METRICS = ['nan_euclidean']

Expand Down Expand Up @@ -1694,6 +1743,9 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
See the documentation for scipy.spatial.distance for details on these
metrics. These metrics do not support sparse matrix inputs.

- From geopy.distance: ['geodesic']
See the documentation for geopy for details on this metric.

Note that in the case of 'cityblock', 'cosine' and 'euclidean' (which are
valid scipy.spatial.distance metrics), the scikit-learn implementation
will be used, which is faster and has support for sparse matrices (except
Expand Down
2 changes: 2 additions & 0 deletions sklearn/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ._ball_tree import BallTree
from ._kd_tree import KDTree
from ._trilateration import TrilaterationIndex
from ._dist_metrics import DistanceMetric
from ._graph import kneighbors_graph, radius_neighbors_graph
from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer
Expand All @@ -20,6 +21,7 @@
__all__ = ['BallTree',
'DistanceMetric',
'KDTree',
'TrilaterationIndex',
'KNeighborsClassifier',
'KNeighborsRegressor',
'KNeighborsTransformer',
Expand Down
3 changes: 2 additions & 1 deletion sklearn/neighbors/_ball_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ VALID_METRICS = ['EuclideanDistance', 'SEuclideanDistance',
'DiceDistance', 'KulsinskiDistance',
'RogersTanimotoDistance', 'RussellRaoDistance',
'SokalMichenerDistance', 'SokalSneathDistance',
'PyFuncDistance', 'HaversineDistance']
'PyFuncDistance', 'HaversineDistance',
'GeodesicDistance']


include "_binary_tree.pxi"
Expand Down
31 changes: 30 additions & 1 deletion sklearn/neighbors/_dist_metrics.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
#cython: cdivision=True

# By Jake Vanderplas (2013) <jakevdp@cs.washington.edu>
# Geodesic hack by Chip Lynch (2021) <chip.lynch@louisville.edu> #needswork
# written for the scikit-learn project
# License: BSD

import numpy as np
cimport numpy as np
np.import_array() # required in order to use C-API

from geopy.distance import geodesic

######################################################################
# Numpy 1.3-1.4 compatibility utilities
Expand Down Expand Up @@ -86,7 +88,9 @@ METRIC_MAPPING = {'euclidean': EuclideanDistance,
'sokalmichener': SokalMichenerDistance,
'sokalsneath': SokalSneathDistance,
'haversine': HaversineDistance,
'pyfunc': PyFuncDistance}
'pyfunc': PyFuncDistance,
'geodesic': GeodesicDistance,
'vincenty': GeodesicDistance}


def get_valid_metric_ids(L):
Expand Down Expand Up @@ -410,6 +414,31 @@ cdef class DistanceMetric:
get_memview_DTYPE_2D(Darr))
return Darr

#------------------------------------------------------------
# Geodesic Distance
# Accurate distance between two points on earth
# modeled as an Ellipsoid
# see HaversineDistance for a Spheroid earth distance
cdef class GeodesicDistance(DistanceMetric):
r""" Geodesic Distance metric

Imported from geopy, see:
https://geopy.readthedocs.io/en/stable/#module-geopy.distance
"""

# Late import to only happen if needed
from geopy.distance import geodesic

def __init__(self):
self.p = 2

cdef DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
ITYPE_t size) nogil except -1:
if size != 2:
with gil:
raise ValueError("Geodesic Distance takes only 2-dimensions (Latitude, Longitude)")
with gil:
return geodesic((x1[0], x1[1]),(x2[0], x2[1])).kilometers

#------------------------------------------------------------
# Euclidean Distance
Expand Down
Loading