OpenGridMap · tsrivishnu · Aug 25, 2016 · Aug 28, 2016 · Aug 28, 2016 · Aug 31, 2016
diff --git a/estimate_power_pole_distance.py b/estimate_power_pole_distance.py
@@ -0,0 +1,64 @@
+import psycopg2
+import json
+import sys
+import ast
+import numpy as np
+import scipy.stats as stats
+import pylab as pl
+
+try:
+    conn = psycopg2.connect("dbname='gis' user='Munna' host='localhost' password=''")
+    cur = conn.cursor()
+except:
+    print("I am unable to connect to the database")
+
+min_distances = []
+max_distances = []
+
+get_powerlines_query = "SELECT id as id, properties->>'refs' as ref FROM powerline LIMIT 10"
+cur.execute(get_powerlines_query)
+powerlines = cur.fetchall()
+for powerline in powerlines:
+    nodes_osmids = ast.literal_eval(powerline[1])
+    nodes_osmids = tuple([str(i) for i in nodes_osmids])
+    nodes_count_query = "SELECT count(DISTINCT id) FROM point WHERE properties->>'osmid' IN %s"
+    cur.execute(nodes_count_query, (nodes_osmids, ))
+    nodes_count = cur.fetchone()
+    # check if we have all the nodes for a powerline in our database
+    if nodes_count[0] == len(nodes_osmids):
+        nodes_distances_query = '''SELECT MIN(ST_Distance(a.geom, b.geom)),
+                                MAX(ST_Distance(a.geom, b.geom))
+                            FROM
+                            point a,
+                            point b
+                            WHERE a.id IN (SELECT DISTINCT id FROM point where properties->>'osmid' in %s)
+                                AND b.id IN (SELECT DISTINCT id FROM point where properties->>'osmid' in %s)
+                                AND a.id != b.id'''
+        cur.execute(nodes_distances_query, (nodes_osmids, nodes_osmids, ))
+        distances = cur.fetchone()
+        print(distances)
+        min_distances.append(distances[0])
+        max_distances.append(distances[1])
+
+print("Average minimum distance: ", )
+print(reduce(lambda x, y: x + y, min_distances) / len(min_distances))
+print("Average maximum distance: ", )
+print(reduce(lambda x, y: x + y, max_distances) / len(max_distances))
+
+# h = sorted(min_distances)
+
+# fit = stats.norm.pdf(h, np.mean(h), np.std(h))  #this is a fitting indeed
+
+# f = pl.figure()
+# pl.plot(h,fit,'-o')
+# pl.hist(h,normed=True)      #use this to draw histogram of your data
+# f.savefig('min.pdf')
+
+# h = sorted(max_distances)
+
+# fit = stats.norm.pdf(h, np.mean(h), np.std(h))  #this is a fitting indeed
+
+# f = pl.figure()
+# pl.plot(h,fit,'-o')
+# pl.hist(h,normed=True)      #use this to draw histogram of your data
+# f.savefig('max.pdf')
diff --git a/migrations/versions/360ac2066ce_create_inferred_powerlines_table.py b/migrations/versions/360ac2066ce_create_inferred_powerlines_table.py
@@ -0,0 +1,30 @@
+"""create inferred_powerlines table
+
+Revision ID: 360ac2066ce
+Revises: 58ba1b9ec53
+Create Date: 2016-08-28 14:40:35.960705
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '360ac2066ce'
+down_revision = '58ba1b9ec53'
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    # copy the structure of `powerline` table and create `inferred_powerlines`
+    # Note: The below approach won't copy quite some information like
+    #       Foreignkeys and triggers.
+    #       Refer: http://stackoverflow.com/a/1220942/976880
+    op.execute("CREATE table inferred_powerlines("\
+                   "like powerline "\
+                   "including defaults "\
+                   "including constraints "\
+                   "including indexes"\
+               ")")
+
+def downgrade():
+    op.drop_table("inferred_powerlines")
diff --git a/way_inference/README.md b/way_inference/README.md
@@ -0,0 +1,104 @@
+# Way Inference
+
+# How does it work?
+
+The idea was to take basic simple methods and make them work in co-ordination and show methods for way inference covering possible scenarios.
+> These scripts at the moment can't perform very good inferrence. This is more like, prooving the possiblity.
+
+
+## Basic script
+
+Takes in boundaries to run inferrence on that region of the map.
+Gets clusters of point (discussed [here](#clustering)) and loops over each cluster. Each cluster is possibly a powerline.
+For each cluster
+* Start inferrence with a point the cluster. How we pick a point to start with is discussed [Here](#choosing-a-point-to-start-with)
+* Fetch the database and choose a [possible next point](#choosing-a-possible-next-point).
+ * Here we have different options on how to choose a next point. Some are even ignored during this process (discussed [here](#choosing-a-possible-next-point)).
+* Once we have gone though all the points, we will save the points as a line into the database.
+* Alternative to the above point, if we encounter a point that meets a polygon (building), we assume its the end of the line and save the line to database.
+We will continue freshly with the remaining point from the cluster as new a new cluster.
+
+As simple as that!
+
+## Clustering
+
+Everything basically starts with clustering.
+Given a boundary, we take clusters of points in that region from our database.
+We take the points that are not marked as substations or stations or something similar.
+We cluster points based on the distance between them.
+For powerlines (at least) for those we had in the database, we calculated (using `../estimate_power_pole_distance.py` script) the average maximum distance between two points in a powerline is around 0.1 in postGIS distance units (projected units based on spatial ref - [See more](http://postgis.net/docs/ST_Distance.html)).
+So, we cluster points that are in that distance of each other (at least with another point in that cluster).
+You could see the `ClusterWrapper` for the code related to this.
+
+## Choosing a point to start with
+
+Within a cluster, we choose the points based on different methods.
+
+### Farthest points
+
+By default, we choose one of the two farthest points.
+The distances are calculated and farthest points are fetched from SQL for performance leveraging the [postGIS](http://postgis.net) functions.
+The script then takes one of those points as a starting point.
+
+### Points on a substation
+**Optional**
+
+Some polygons are tagged with `power: substation` in the OpenStreetMaps data to mark substations.
+Also, lines that are taggest with `power: line` but are closed are also usually substations.
+If the option to select such points in the script, the script fetches points (from the cluster) that intersects/within such polygons.
+This again uses the postGIS functions and such points are fecthed using SQL queries.
+The script then takes one of those points as a starting point.
+If no such points are found, falls back to using **Farthest points**
+
+## Choosing a possible next point
+
+This is one of the critical steps which would make a difference on how the lines are inferred.
+A possible next point from a point is done based on different variables.
+We have tested these variables seperately. They cannot all work together.
+However, a few of them can work together.
+
+### Closest point in a ring
+
+By default, we choose the next closest point in a ring.
+Its highly possible that a point has points that are too close (in case of parallelly running powerlines).
+We don't want to join those _too-close_ points.
+Similarly, there are always points around a point that are too far.
+We used the `../estimate_power_pole_distance.py` script to find the average minimum and maximum distance between adjacent points among the powerlines in our database.
+Using these minimum and maximum distance, we set the variables in the `NodeWrapper` and form an SQL query to fetch points within the ring with inner and outer radii as minimum and maximum distances respectively.
+From these closest points we take the most closest point.
+
+**Issues:**
+* Crossing powerlines: There usually are powerlines crossing each other and this method might fail by picking a point in supposed to be the other powerline.
+* Lines joining at a substations: Powerlines usually join at a substation and this method might fail by picking points from other powerlines, similar to the _Crossing powerlines_ issue.
+* Parallel lines: Powerlines tend to run parallel for quite some distances from a substation, such parallel lines are failed to be inferred correctly. It forms a zig-zag pattern by joining points from the possible two line alternatively.
+
+**Works best for:**
+* Lone powerlines: Powerlines that run idependantly without any intersections and parallel neighbours.
+
+### Point causing less angle of deviation
+
+Not default. Set by setting the variable `is_less_deviating_point_preferred` to `True`.
+
+While building a line, this method will choose a point that will deviate a line with a lesser angle than all other points that are around it within a maximum distance of 0.008 units.
+This maximum distance is needed so that, too far away nodes are not taken into consideration as quite often the farthest nodes create less deviation angle.
+From a given point, we gather the possible angle formed by all the points within the maximum distance.
+We will then ceil these angles to the closest 5th multiple and round up the distances upto third decimal point.
+We sort these nodes by angle and then by distance so that all the nodes that are forming noticeable deviation (5 degree angle) form a group.
+We will then choose the closest node from the group that forms the least deviation.
+
+The varibales that control angles' ceiling multiple, distance roundup decimal point and maximum distance for point to be considered in angle deviation calculation are within the `NodesWrapper` class.
+
+**Issues:**
+* Lines joining at a substation: Such lines usually take sharp turns at substations but points from neighbouring lines might show less deviations.
+
+**Works better for:**
+* Parallel lines
+* Crossing powerlines
+
+
+## Varibles that can work together
+* Farthest points, Points on a substation, Next Closest within the ring, Point causing less deviation.
+
+# Requirements
+* Postgres with [postGIS](http://postgis.net) Version 2.2
+* Python 2.7
diff --git a/way_inference/cluster_wrapper.py b/way_inference/cluster_wrapper.py
@@ -0,0 +1,35 @@
+class ClusterWrapper:
+    bounds = []
+    cur = None
+
+    def __init__(self, sql_cur, bounds):
+        self.bounds = bounds
+        self.cur = sql_cur
+
+    def getClusters(self):
+        self.cur.execute(
+            self._clusters_query(),
+            self._clusters_query_args()
+        )
+        print(self._clusters_query_args())
+        return self.cur.fetchall()
+
+    def _clusters_query(self):
+        # TODO: Get only the points that are not part of a powerline
+        clusters_query = """
+            SELECT ST_AsText(unnest((ST_ClusterWithin(geom, 0.1))))
+                AS cluster_geom
+            FROM point
+            WHERE ST_Intersects(
+                    ST_MakeEnvelope(%s, %s, %s, %s),
+                    geom
+                  )
+                  AND properties->>'tags' LIKE '%%power%%'
+            """
+        return clusters_query
+
+    def _clusters_query_args(self):
+        args = [self.bounds[1], self.bounds[0],
+                self.bounds[3], self.bounds[2]]
+        return args
+
diff --git a/way_inference/infer_way.py b/way_inference/infer_way.py
@@ -0,0 +1,162 @@
+import psycopg2
+import json
+import sys
+import ast
+from cluster_wrapper import ClusterWrapper
+from nodes_wrapper import NodesWrapper
+from ways_wrapper import WaysWrapper
+
+try:
+    conn = psycopg2.connect("dbname='gis' user='Munna' host='localhost' password=''")
+    cur = conn.cursor()
+except:
+    print("I am unable to connect to the database")
+
+nodes_osmids = (
+'3212196101', '3212196097', '3212196093', '3212196089', '3212196086', '3212196083',
+'3212196077', '3212196075', '3212196071', '3212196045', '3212196012', '3212195977',
+'3212195974', '3212195967', '3212195960', '3212195952', '3212195947',
+'3212195940', '3212195935', '3212195931', '3212195926', '3212195925',
+'3212195924', '3212195923', '3212195917', '3212195908', '3212195898',
+'3212195884', '3212195874', '3212195866', '3212195869', '3212195878',
+'3212195882', '3212195889', '3212195895', '3212195893', '3212195896'
+)
+# bounds = [10.39529800415039, 4.050234320898018, 10.50516128540039, 4.109221809610561] # parallel lines
+bounds = [15.496902465820312, -1.4843615162701949, 16.375808715820312, -1.0113763068489454] # the short line in the middle of africa
+
+# bounds = [15.003890991210938, -4.800890838853971, 15.663070678710938, -4.137558228375503] # bigger africa
+# bounds = [15.232973098754881, -4.554179759718965, 15.342836380004883, -4.495226724658142]
+# bounds = [15.070838928222654, -4.89223025116464, 15.510292053222656, -4.656501822101158]
+# bounds = [15.22255539894104, -4.743145262934742, 15.23628830909729, -4.735778370815115]
+bounds = [14.515686035156248, -5.103254918829327, 16.27349853515625, -4.160158150193397] # Africa Mbanza
+# bounds = [10.853462219238281, 49.238000036465465, 11.292915344238281, 49.392206057422044] # Nuremburg
+
+# settings
+is_less_deviating_point_preferred = True
+prefer_nodes_from_substations = True
+
+nodesWrapper = NodesWrapper(cur, bounds)
+clustersWrapper = ClusterWrapper(cur, bounds)
+waysWrapper = WaysWrapper(cur, bounds)
+
+clusters = clustersWrapper.getClusters()
+
+def infer_way_from_nodes(nodes_osmids, cluster_geom_text=None):
+
+    processing_node = None
+
+    if prefer_nodes_from_substations == True:
+        nodes_on_polygons = nodesWrapper.get_node_osmids_intersecting_polygons(nodes_osmids)
+        if len(nodes_on_polygons) > 0:
+            processing_node = nodes_on_polygons[0][0]
+
+    if processing_node is None:
+        if cluster_geom_text is not None:
+            farthest_nodes = nodesWrapper.get_farthest_nodes_in_cluster(cluster_geom_text)
+        else:
+            farthest_nodes = nodesWrapper.get_farthest_nodes_among_nodes(nodes_osmids)
+
+        processing_node = farthest_nodes[0]
+
+    processed_nodes = []
+    ignored_nodes = [] # nodes are ignored if there are too close to a node
+    possible_parallel_line_nodes = []
+
+    processed_nodes.append(processing_node)
+
+    is_complete = False
+
+    while is_complete == False:
+        # procesed nodes minus the all nodes
+        unprocessed_nodes = tuple(set(nodes_osmids) - set(tuple(processed_nodes)))
+
+        # unprocessed nodes minus the ignored node.
+        unprocessed_nodes = tuple(set(unprocessed_nodes) - set(tuple(ignored_nodes)))
+
+        closest_node = None
+
+        if is_less_deviating_point_preferred and len(processed_nodes) > 1 and processing_node != processed_nodes[-2]:
+            last_processed_node = processed_nodes[-2]
+        else:
+            last_processed_node = None
+
+        if len(unprocessed_nodes) > 0:
+            nodes_around = nodesWrapper.get_closest_nodes_to(
+                processing_node,
+                unprocessed_nodes,
+                last_processed_node
+            )
+
+            ignored_nodes = ignored_nodes + nodes_around['too_close_node_osmids']
+            possible_parallel_line_nodes = possible_parallel_line_nodes + nodes_around['possible_parallel_line_nodes']
+
+            closest_node = None
+
+            if is_less_deviating_point_preferred == True and len(nodes_around['angles']) > 0:
+                sorted_nodes = sorted(
+                    nodes_around['angles'],
+                    key=lambda x: (x[2], x[1])
+                ) # sort by angle and then distance
+
+                print("From -> %s: %s" % (processing_node, sorted_nodes))
+
+                if sorted_nodes[0][2] is not None: # angle could sometimes be none.
+                    closest_node = sorted_nodes[0][0]
+
+            if closest_node is None:
+                closest_node = nodes_around['closest_node_osmid']
+        else:
+            is_complete = True
+            continue
+
+        if closest_node is not None:
+            print ".",
+            processing_node = closest_node
+            processed_nodes.append(processing_node)
+
+            # if the node that is just processed in any polygon.
+            if waysWrapper.is_node_in_any_polygon(processing_node):
+                is_complete = True
+        else:
+            print("\n*********** IS COMPLETE **************\n")
+            is_complete = True
+
+    if len(processed_nodes) < 1:
+        # This means, we couldn't find any close nodes.
+        # End the iteration of the cluster.
+        return
+    else:
+        # print(processed_nodes)
+        for node_osmid in processed_nodes:
+            print("node(%s);" % node_osmid)
+
+        inferrence_notes = {}
+        if len(possible_parallel_line_nodes) >= (len(processed_nodes)/4):
+            inferrence_notes = {
+                'possible_error': True,
+                'notes': 'Posisble paralle lines'
+            }
+        waysWrapper.save_to_database(processed_nodes, inferrence_notes)
+        conn.commit()
+
+        # all nodes minus the processed nodes
+        unprocessed_nodes = tuple(set(nodes_osmids) - set(tuple(processed_nodes)))
+        # unprocessed nodes minus the ignored node.
+        unprocessed_nodes = tuple(set(unprocessed_nodes) - set(tuple(ignored_nodes)))
+
+        if len(unprocessed_nodes) > 1:
+            # There are more nodes to be processed in this cluster.
+            infer_way_from_nodes(nodes_osmids=unprocessed_nodes)
+
+for cluster in clusters:
+    print("************ Processing New Cluster **************")
+    nodes_osmids = nodesWrapper.get_nodes_osmids_in_cluster(cluster[0])
+    if len(nodes_osmids) > 1:
+        if prefer_nodes_from_substations:
+            infer_way_from_nodes(nodes_osmids)
+        else:
+            infer_way_from_nodes(nodes_osmids, cluster[0])
+
+    else:
+        print("Not enough nodes in cluster! - SKIPPING")
+