Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions estimate_power_pole_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import psycopg2
import json
import sys
import ast
import numpy as np
import scipy.stats as stats
import pylab as pl

try:
conn = psycopg2.connect("dbname='gis' user='Munna' host='localhost' password=''")
cur = conn.cursor()
except:
print("I am unable to connect to the database")

min_distances = []
max_distances = []

get_powerlines_query = "SELECT id as id, properties->>'refs' as ref FROM powerline LIMIT 10"
cur.execute(get_powerlines_query)
powerlines = cur.fetchall()
for powerline in powerlines:
nodes_osmids = ast.literal_eval(powerline[1])
nodes_osmids = tuple([str(i) for i in nodes_osmids])
nodes_count_query = "SELECT count(DISTINCT id) FROM point WHERE properties->>'osmid' IN %s"
cur.execute(nodes_count_query, (nodes_osmids, ))
nodes_count = cur.fetchone()
# check if we have all the nodes for a powerline in our database
if nodes_count[0] == len(nodes_osmids):
nodes_distances_query = '''SELECT MIN(ST_Distance(a.geom, b.geom)),
MAX(ST_Distance(a.geom, b.geom))
FROM
point a,
point b
WHERE a.id IN (SELECT DISTINCT id FROM point where properties->>'osmid' in %s)
AND b.id IN (SELECT DISTINCT id FROM point where properties->>'osmid' in %s)
AND a.id != b.id'''
cur.execute(nodes_distances_query, (nodes_osmids, nodes_osmids, ))
distances = cur.fetchone()
print(distances)
min_distances.append(distances[0])
max_distances.append(distances[1])

print("Average minimum distance: ", )
print(reduce(lambda x, y: x + y, min_distances) / len(min_distances))
print("Average maximum distance: ", )
print(reduce(lambda x, y: x + y, max_distances) / len(max_distances))

# h = sorted(min_distances)

# fit = stats.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed

# f = pl.figure()
# pl.plot(h,fit,'-o')
# pl.hist(h,normed=True) #use this to draw histogram of your data
# f.savefig('min.pdf')

# h = sorted(max_distances)

# fit = stats.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed

# f = pl.figure()
# pl.plot(h,fit,'-o')
# pl.hist(h,normed=True) #use this to draw histogram of your data
# f.savefig('max.pdf')
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""create inferred_powerlines table

Revision ID: 360ac2066ce
Revises: 58ba1b9ec53
Create Date: 2016-08-28 14:40:35.960705

"""

# revision identifiers, used by Alembic.
revision = '360ac2066ce'
down_revision = '58ba1b9ec53'

from alembic import op
import sqlalchemy as sa


def upgrade():
# copy the structure of `powerline` table and create `inferred_powerlines`
# Note: The below approach won't copy quite some information like
# Foreignkeys and triggers.
# Refer: http://stackoverflow.com/a/1220942/976880
op.execute("CREATE table inferred_powerlines("\
"like powerline "\
"including defaults "\
"including constraints "\
"including indexes"\
")")

def downgrade():
op.drop_table("inferred_powerlines")
104 changes: 104 additions & 0 deletions way_inference/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Way Inference

# How does it work?

The idea was to take basic simple methods and make them work in co-ordination and show methods for way inference covering possible scenarios.
> These scripts at the moment can't perform very good inferrence. This is more like, prooving the possiblity.


## Basic script

Takes in boundaries to run inferrence on that region of the map.
Gets clusters of point (discussed [here](#clustering)) and loops over each cluster. Each cluster is possibly a powerline.
For each cluster
* Start inferrence with a point the cluster. How we pick a point to start with is discussed [Here](#choosing-a-point-to-start-with)
* Fetch the database and choose a [possible next point](#choosing-a-possible-next-point).
* Here we have different options on how to choose a next point. Some are even ignored during this process (discussed [here](#choosing-a-possible-next-point)).
* Once we have gone though all the points, we will save the points as a line into the database.
* Alternative to the above point, if we encounter a point that meets a polygon (building), we assume its the end of the line and save the line to database.
We will continue freshly with the remaining point from the cluster as new a new cluster.

As simple as that!

## Clustering

Everything basically starts with clustering.
Given a boundary, we take clusters of points in that region from our database.
We take the points that are not marked as substations or stations or something similar.
We cluster points based on the distance between them.
For powerlines (at least) for those we had in the database, we calculated (using `../estimate_power_pole_distance.py` script) the average maximum distance between two points in a powerline is around 0.1 in postGIS distance units (projected units based on spatial ref - [See more](http://postgis.net/docs/ST_Distance.html)).
So, we cluster points that are in that distance of each other (at least with another point in that cluster).
You could see the `ClusterWrapper` for the code related to this.

## Choosing a point to start with

Within a cluster, we choose the points based on different methods.

### Farthest points

By default, we choose one of the two farthest points.
The distances are calculated and farthest points are fetched from SQL for performance leveraging the [postGIS](http://postgis.net) functions.
The script then takes one of those points as a starting point.

### Points on a substation
**Optional**

Some polygons are tagged with `power: substation` in the OpenStreetMaps data to mark substations.
Also, lines that are taggest with `power: line` but are closed are also usually substations.
If the option to select such points in the script, the script fetches points (from the cluster) that intersects/within such polygons.
This again uses the postGIS functions and such points are fecthed using SQL queries.
The script then takes one of those points as a starting point.
If no such points are found, falls back to using **Farthest points**

## Choosing a possible next point

This is one of the critical steps which would make a difference on how the lines are inferred.
A possible next point from a point is done based on different variables.
We have tested these variables seperately. They cannot all work together.
However, a few of them can work together.

### Closest point in a ring

By default, we choose the next closest point in a ring.
Its highly possible that a point has points that are too close (in case of parallelly running powerlines).
We don't want to join those _too-close_ points.
Similarly, there are always points around a point that are too far.
We used the `../estimate_power_pole_distance.py` script to find the average minimum and maximum distance between adjacent points among the powerlines in our database.
Using these minimum and maximum distance, we set the variables in the `NodeWrapper` and form an SQL query to fetch points within the ring with inner and outer radii as minimum and maximum distances respectively.
From these closest points we take the most closest point.

**Issues:**
* Crossing powerlines: There usually are powerlines crossing each other and this method might fail by picking a point in supposed to be the other powerline.
* Lines joining at a substations: Powerlines usually join at a substation and this method might fail by picking points from other powerlines, similar to the _Crossing powerlines_ issue.
* Parallel lines: Powerlines tend to run parallel for quite some distances from a substation, such parallel lines are failed to be inferred correctly. It forms a zig-zag pattern by joining points from the possible two line alternatively.

**Works best for:**
* Lone powerlines: Powerlines that run idependantly without any intersections and parallel neighbours.

### Point causing less angle of deviation

Not default. Set by setting the variable `is_less_deviating_point_preferred` to `True`.

While building a line, this method will choose a point that will deviate a line with a lesser angle than all other points that are around it within a maximum distance of 0.008 units.
This maximum distance is needed so that, too far away nodes are not taken into consideration as quite often the farthest nodes create less deviation angle.
From a given point, we gather the possible angle formed by all the points within the maximum distance.
We will then ceil these angles to the closest 5th multiple and round up the distances upto third decimal point.
We sort these nodes by angle and then by distance so that all the nodes that are forming noticeable deviation (5 degree angle) form a group.
We will then choose the closest node from the group that forms the least deviation.

The varibales that control angles' ceiling multiple, distance roundup decimal point and maximum distance for point to be considered in angle deviation calculation are within the `NodesWrapper` class.

**Issues:**
* Lines joining at a substation: Such lines usually take sharp turns at substations but points from neighbouring lines might show less deviations.

**Works better for:**
* Parallel lines
* Crossing powerlines


## Varibles that can work together
* Farthest points, Points on a substation, Next Closest within the ring, Point causing less deviation.

# Requirements
* Postgres with [postGIS](http://postgis.net) Version 2.2
* Python 2.7
35 changes: 35 additions & 0 deletions way_inference/cluster_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
class ClusterWrapper:
bounds = []
cur = None

def __init__(self, sql_cur, bounds):
self.bounds = bounds
self.cur = sql_cur

def getClusters(self):
self.cur.execute(
self._clusters_query(),
self._clusters_query_args()
)
print(self._clusters_query_args())
return self.cur.fetchall()

def _clusters_query(self):
# TODO: Get only the points that are not part of a powerline
clusters_query = """
SELECT ST_AsText(unnest((ST_ClusterWithin(geom, 0.1))))
AS cluster_geom
FROM point
WHERE ST_Intersects(
ST_MakeEnvelope(%s, %s, %s, %s),
geom
)
AND properties->>'tags' LIKE '%%power%%'
"""
return clusters_query

def _clusters_query_args(self):
args = [self.bounds[1], self.bounds[0],
self.bounds[3], self.bounds[2]]
return args

162 changes: 162 additions & 0 deletions way_inference/infer_way.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import psycopg2
import json
import sys
import ast
from cluster_wrapper import ClusterWrapper
from nodes_wrapper import NodesWrapper
from ways_wrapper import WaysWrapper

try:
conn = psycopg2.connect("dbname='gis' user='Munna' host='localhost' password=''")
cur = conn.cursor()
except:
print("I am unable to connect to the database")

nodes_osmids = (
'3212196101', '3212196097', '3212196093', '3212196089', '3212196086', '3212196083',
'3212196077', '3212196075', '3212196071', '3212196045', '3212196012', '3212195977',
'3212195974', '3212195967', '3212195960', '3212195952', '3212195947',
'3212195940', '3212195935', '3212195931', '3212195926', '3212195925',
'3212195924', '3212195923', '3212195917', '3212195908', '3212195898',
'3212195884', '3212195874', '3212195866', '3212195869', '3212195878',
'3212195882', '3212195889', '3212195895', '3212195893', '3212195896'
)
# bounds = [10.39529800415039, 4.050234320898018, 10.50516128540039, 4.109221809610561] # parallel lines
bounds = [15.496902465820312, -1.4843615162701949, 16.375808715820312, -1.0113763068489454] # the short line in the middle of africa

# bounds = [15.003890991210938, -4.800890838853971, 15.663070678710938, -4.137558228375503] # bigger africa
# bounds = [15.232973098754881, -4.554179759718965, 15.342836380004883, -4.495226724658142]
# bounds = [15.070838928222654, -4.89223025116464, 15.510292053222656, -4.656501822101158]
# bounds = [15.22255539894104, -4.743145262934742, 15.23628830909729, -4.735778370815115]
bounds = [14.515686035156248, -5.103254918829327, 16.27349853515625, -4.160158150193397] # Africa Mbanza
# bounds = [10.853462219238281, 49.238000036465465, 11.292915344238281, 49.392206057422044] # Nuremburg

# settings
is_less_deviating_point_preferred = True
prefer_nodes_from_substations = True

nodesWrapper = NodesWrapper(cur, bounds)
clustersWrapper = ClusterWrapper(cur, bounds)
waysWrapper = WaysWrapper(cur, bounds)

clusters = clustersWrapper.getClusters()

def infer_way_from_nodes(nodes_osmids, cluster_geom_text=None):

processing_node = None

if prefer_nodes_from_substations == True:
nodes_on_polygons = nodesWrapper.get_node_osmids_intersecting_polygons(nodes_osmids)
if len(nodes_on_polygons) > 0:
processing_node = nodes_on_polygons[0][0]

if processing_node is None:
if cluster_geom_text is not None:
farthest_nodes = nodesWrapper.get_farthest_nodes_in_cluster(cluster_geom_text)
else:
farthest_nodes = nodesWrapper.get_farthest_nodes_among_nodes(nodes_osmids)

processing_node = farthest_nodes[0]

processed_nodes = []
ignored_nodes = [] # nodes are ignored if there are too close to a node
possible_parallel_line_nodes = []

processed_nodes.append(processing_node)

is_complete = False

while is_complete == False:
# procesed nodes minus the all nodes
unprocessed_nodes = tuple(set(nodes_osmids) - set(tuple(processed_nodes)))

# unprocessed nodes minus the ignored node.
unprocessed_nodes = tuple(set(unprocessed_nodes) - set(tuple(ignored_nodes)))

closest_node = None

if is_less_deviating_point_preferred and len(processed_nodes) > 1 and processing_node != processed_nodes[-2]:
last_processed_node = processed_nodes[-2]
else:
last_processed_node = None

if len(unprocessed_nodes) > 0:
nodes_around = nodesWrapper.get_closest_nodes_to(
processing_node,
unprocessed_nodes,
last_processed_node
)

ignored_nodes = ignored_nodes + nodes_around['too_close_node_osmids']
possible_parallel_line_nodes = possible_parallel_line_nodes + nodes_around['possible_parallel_line_nodes']

closest_node = None

if is_less_deviating_point_preferred == True and len(nodes_around['angles']) > 0:
sorted_nodes = sorted(
nodes_around['angles'],
key=lambda x: (x[2], x[1])
) # sort by angle and then distance

print("From -> %s: %s" % (processing_node, sorted_nodes))

if sorted_nodes[0][2] is not None: # angle could sometimes be none.
closest_node = sorted_nodes[0][0]

if closest_node is None:
closest_node = nodes_around['closest_node_osmid']
else:
is_complete = True
continue

if closest_node is not None:
print ".",
processing_node = closest_node
processed_nodes.append(processing_node)

# if the node that is just processed in any polygon.
if waysWrapper.is_node_in_any_polygon(processing_node):
is_complete = True
else:
print("\n*********** IS COMPLETE **************\n")
is_complete = True

if len(processed_nodes) < 1:
# This means, we couldn't find any close nodes.
# End the iteration of the cluster.
return
else:
# print(processed_nodes)
for node_osmid in processed_nodes:
print("node(%s);" % node_osmid)

inferrence_notes = {}
if len(possible_parallel_line_nodes) >= (len(processed_nodes)/4):
inferrence_notes = {
'possible_error': True,
'notes': 'Posisble paralle lines'
}
waysWrapper.save_to_database(processed_nodes, inferrence_notes)
conn.commit()

# all nodes minus the processed nodes
unprocessed_nodes = tuple(set(nodes_osmids) - set(tuple(processed_nodes)))
# unprocessed nodes minus the ignored node.
unprocessed_nodes = tuple(set(unprocessed_nodes) - set(tuple(ignored_nodes)))

if len(unprocessed_nodes) > 1:
# There are more nodes to be processed in this cluster.
infer_way_from_nodes(nodes_osmids=unprocessed_nodes)

for cluster in clusters:
print("************ Processing New Cluster **************")
nodes_osmids = nodesWrapper.get_nodes_osmids_in_cluster(cluster[0])
if len(nodes_osmids) > 1:
if prefer_nodes_from_substations:
infer_way_from_nodes(nodes_osmids)
else:
infer_way_from_nodes(nodes_osmids, cluster[0])

else:
print("Not enough nodes in cluster! - SKIPPING")

Loading