From ad7094269b765004b8f7a6899ef9876aa644d865 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Thu, 14 Apr 2016 18:05:31 -0400
Subject: [PATCH 01/10] Initial draft of risk aware controllers

- learning isn't working properly
---
 risk_aware_control/riskaware.py          | 159 ++++++++++++++++++
 risk_aware_control/riskaware_learning.py | 201 +++++++++++++++++++++++
 2 files changed, 360 insertions(+)
 create mode 100644 risk_aware_control/riskaware.py
 create mode 100644 risk_aware_control/riskaware_learning.py

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
new file mode 100644
index 0000000..8305b3d
--- /dev/null
+++ b/risk_aware_control/riskaware.py
@@ -0,0 +1,159 @@
+import numpy as np
+import seaborn
+import matplotlib.pyplot as plt
+from matplotlib import animation
+
+class Runner: 
+
+    def __init__(self):
+        self.num_states = 400
+        self.domain = np.linspace(-10, 10, self.num_states)
+
+        self.num_systems = 3
+        self.x = np.zeros(self.num_systems)
+        self.var = [3, 1, .5]
+        # the initial state probability 
+        self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)])
+
+        self.drift = -.15 # systems drifts left
+        self.highlander_mode = True # allow more than one action at a time?
+
+        # have a controller that pushes us left
+        # self.u = np.array([1, -1, 5, -5, 10, -10])
+        self.u = np.array([0, .5, -.5])
+        self.L = []
+        for u in self.u:
+            offset = int(u / 20.0 * 400.0)
+            self.L.append(
+                # moves away from current state
+                np.diag(np.ones(self.num_states)) * -1 +
+                # moves into state + u
+                np.diag(np.ones(self.num_states-abs(offset)), -offset))
+
+        # also need a cost function (Gaussian to move towards the center)
+        self.make_v()
+
+        self.track_position = []
+        self.track_target = []
+
+    def make_gauss(self, mean=0, var=.5):
+        return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
+
+    def make_v(self, mean=0):
+        self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01)
+        self.v = self.v * 5 - 1
+        self.v[np.where(self.v > 0)] = 1.0
+
+    def physics(self, u):
+        for ii in range(self.num_systems):
+            self.x[ii] += (self.drift + u[ii]) # simple physics
+            self.px[ii] = self.make_gauss(self.x[ii], self.var[ii]) 
+            self.px[ii] /= np.max(self.px[ii])
+            # clip at zero and normalize px
+            self.px[ii][np.where(self.px[ii] < 0)] = 0.0
+
+    def anim_init(self):
+        self.v_line.set_data([], [])
+        self.px_line0.set_data([], [])
+        self.px_line1.set_data([], [])
+        self.px_line2.set_data([], [])
+        plt.legend(['value function', 'np.dot(Li, p(x))'])
+        return self.v_line, self.px_line0, self.px_line1, self.px_line2
+
+    def anim_animate(self, i):
+
+        # calculate the weights for the actions
+        self.wu = np.zeros((self.num_systems, len(self.u)))
+        for ii, Li in enumerate(self.L):
+            for jj in range(self.num_systems):
+                if self.highlander_mode is False: 
+                    self.wu[jj,ii] = min(1,
+                            max(0, np.dot(self.v, np.dot(Li, self.px[jj]))))
+                else:
+                    # don't clip it here so we can tell the actual winner 
+                    self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj]))
+        # select the strongest action 
+        if self.highlander_mode is True:
+            for ii in range(self.num_systems):
+                index = self.wu[ii].argmax()
+                val = self.wu[ii, index]
+                self.wu[ii] = np.zeros(len(self.u))
+                # now clip it
+                self.wu[ii,index] = min(1, val)
+
+        print self.wu
+
+        # track information for plotting
+        self.track_position.append(np.copy(self.x))
+        # get edges of value function
+        road = np.where(self.v == 1)
+        self.track_target.append(np.array([self.domain[road[0][0]], # left edge
+                                           self.domain[road[0][-1]]])) # right edge
+        # simulate dynamics and get new state
+        self.physics(np.dot(self.wu, self.u))
+        # move the target around slowly
+        self.make_v(np.sin(i*.1)*5)
+
+
+        self.px_line0.set_data(range(self.num_states), self.px[0])
+        self.px_line1.set_data(range(self.num_states), self.px[1])
+        self.px_line2.set_data(range(self.num_states), self.px[2])
+        self.v_line.set_data(range(self.num_states), self.v)
+
+        return self.v_line, self.px_line0, self.px_line1, self.px_line2
+
+    def run(self): 
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+        self.v_line, = ax.plot([],[], color='r', lw=3)
+        self.px_line0, = ax.plot([],[], color='k', lw=3)
+        self.px_line1, = ax.plot([],[], color='k', lw=3)
+        self.px_line2, = ax.plot([],[], color='k', lw=3)
+        
+        plt.xlim([0, self.num_states-1])
+        plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11))
+        plt.ylim([-1, 1])
+
+        anim = animation.FuncAnimation(fig, self.anim_animate, 
+                    init_func=self.anim_init, frames=500, 
+                    interval=100, blit=True)
+        plt.show()
+
+if __name__ == '__main__':
+
+    runner = Runner()
+    runner.run()
+
+    # generate some nice plots
+    fig = plt.figure(figsize=(8, 8))
+    runner.track_position = np.array(runner.track_position)
+    time = runner.track_position.shape[0]
+    X = np.arange(0, time)
+    Y = runner.domain
+    X, Y = np.meshgrid(X, Y)
+    for ii in range(runner.num_systems):
+        # plot borders, and path and heatmap for system ii
+        plt.subplot(runner.num_systems+1, 1, ii+1)
+        plt.plot(runner.track_position[:,ii], 'b', lw=5) # plot actual position of each system
+        plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries
+        # plot a heat map showing sensor information
+        heatmap = np.zeros((runner.num_states, time))
+        for jj in range(time):
+            heatmap[:,jj] = runner.make_gauss(mean=runner.track_position[jj, ii], 
+                                              var=runner.var[ii])
+        plt.pcolormesh(X, Y, heatmap, cmap='terrain_r')   
+
+
+        plt.title('Variance = %.2f'%runner.var[ii])
+        plt.xlim([0, time-1])
+
+        # plot the borders and path of each
+        plt.subplot(runner.num_systems+1, 1, 4)
+        plt.plot(runner.track_position[:,ii], lw=3) # plot actual position of each system
+    plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries
+    plt.xlim([0, time-1])
+    plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05])
+    plt.xlabel('Time')
+
+    plt.tight_layout()
+    plt.show()
diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py
new file mode 100644
index 0000000..9048c1f
--- /dev/null
+++ b/risk_aware_control/riskaware_learning.py
@@ -0,0 +1,201 @@
+import numpy as np
+import seaborn
+import matplotlib.pyplot as plt
+from matplotlib import animation
+
+class Runner: 
+
+    def __init__(self):
+        self.num_states = 400
+        self.limit = 10
+        self.domain = np.linspace(-self.limit, 
+                                   self.limit, 
+                                   self.num_states)
+
+        self.x = 0  # initial position
+        self.var = .5 # variance in sensory information
+        # the initial state probability 
+        self.px = self.make_gauss()
+
+        self.drift = 0#-.15 # constant slide of the system in this direction 
+
+        # action set
+        self.u = np.array([0, .5, -.5])
+        # for learning, start with no knowledge about actions
+        self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \
+                for ii in range(len(self.u))]
+        # for ii in range(len(self.u)):
+        #     np.fill_diagonal(self.L[ii], -1*np.ones(self.num_states))
+        
+        # create some more for plotting later
+        self.L_init = np.copy(self.L)
+        self.L_actual = [np.zeros((self.num_states, self.num_states)) \
+                for ii in range(len(self.u))]
+        for ii in range(len(self.u)):
+            for jj in range(self.num_states):
+                self.x = self.domain[jj]
+                px_old = self.make_gauss(mean=self.x) 
+                px_old /= np.max(px_old)
+                px_old[np.where(px_old < 0)] = 0.0
+                self.physics(self.u[ii])
+                self.L_actual[ii][jj] = np.copy(self.px - px_old)
+
+            # offset = int((u - self.drift)/ 20.0 * 400.0)
+            # self.L_actual.append(
+            #     # moves away from current state
+            #     np.diag(np.ones(self.num_states)) * -1 +
+            #     # moves into state + u
+            #     np.diag(np.ones(self.num_states-abs(offset)), -offset))
+        self.x = 0  # initial position
+           
+        self.gamma = 1e-1 # learning rate
+
+        # also need a cost function (Gaussian to move towards the center)
+        self.make_v()
+
+        self.track_position = []
+        self.track_target = []
+
+    def make_gauss(self, mean=0, var=.5):
+        return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
+
+    def make_v(self, mean=0):
+        self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01)
+        self.v = self.v * 2 - 1
+        self.v[np.where(self.v > 0)] = 1.0
+
+    def physics(self, u):
+        self.x += (self.drift + u) # simple physics
+        self.x = min(self.limit, max(-self.limit, self.x))
+        self.px = self.make_gauss(self.x, self.var) 
+        self.px /= np.max(self.px)
+        # clip at zero and normalize px
+        self.px[np.where(self.px < 0)] = 0.0
+
+    def anim_init(self):
+        self.v_line.set_data([], [])
+        self.px_line.set_data([], [])
+        self.Lpx_line0.set_data([], [])
+        self.Lpx_line1.set_data([], [])
+        self.Lpx_line2.set_data([], [])
+        plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px'])
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def anim_animate(self, i):
+
+        # calculate the weights for the actions
+        self.wu = np.zeros(len(self.u))
+        for ii, Li in enumerate(self.L):
+            # don't clip it here so we can tell the actual winner 
+            self.wu[ii] = np.dot(self.v, np.dot(Li, self.px))
+
+        # set losers to zero (there can be only one action selected)
+        index = self.wu.argmax()
+        # add in some exploration
+        if int(np.random.random()*10) == 5: 
+            print 'here'
+            index = np.random.choice(range(3))
+        val = self.wu[index]
+        self.wu = np.zeros(len(self.u))
+        # now clip it
+        self.wu[index] = 1# min(1, val)
+        # store the predicted change in probability
+        self.dpx_estimate = np.dot(self.L[index], self.px)
+        # also store the old px for calculating dpx
+        self.old_px = np.copy(self.px)
+
+        # track information for plotting
+        self.track_position.append(np.copy(self.x))
+        # get edges of value function
+        road = np.where(self.v == 1)
+        self.track_target.append(np.array([self.domain[road[0][0]], # left edge
+                                           self.domain[road[0][-1]]])) # right edge
+
+        # simulate dynamics and get new state
+        self.physics(np.dot(self.wu, self.u))
+        # move the target around slowly
+        self.make_v(np.sin(i*.01)*9)
+
+        # do our learning
+        err = (self.px - self.old_px) - self.dpx_estimate
+        learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities
+        self.L[index] += learn
+
+        self.px_line.set_data(range(self.num_states), self.px)
+        self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px))
+        self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px))
+        self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px))
+        self.v_line.set_data(range(self.num_states), self.v)
+
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def run(self): 
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+        self.v_line, = ax.plot([],[], color='r', lw=3)
+        self.px_line, = ax.plot([],[], color='k', lw=3)
+        self.Lpx_line0, = ax.plot([],[], color='b', lw=3)
+        self.Lpx_line1, = ax.plot([],[], color='g', lw=3)
+        self.Lpx_line2, = ax.plot([],[], color='y', lw=3)
+        
+        plt.xlim([0, self.num_states-1])
+        plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11))
+        plt.ylim([-1, 1])
+
+        anim = animation.FuncAnimation(fig, self.anim_animate, 
+                    init_func=self.anim_init, frames=10000, 
+                    interval=0, blit=True)
+        plt.show()
+
+if __name__ == '__main__':
+
+    runner = Runner()
+    runner.run()
+
+    # generate some nice plots
+    axes = []
+    X, Y = np.meshgrid(runner.domain, runner.domain)
+    plt.figure(figsize=(9,9))
+    for ii in range(len(runner.u)):
+        # axes.append(plt.subplot(1,3,ii))
+        # plt.axis('equal')
+        # runner.L[ii][0,0] = 1
+        # runner.L[ii][0,1] = -1
+        # plt.pcolormesh(X, Y, runner.L[ii])#, cmap='terrain_r')   
+     
+        # plot the starting vs ending vs actual L operators
+        # plot a heat map showing sensor information
+        axes.append(plt.subplot(4, len(runner.u), ii+1))
+        plt.axis('equal')
+        runner.L_init[ii][0,0] = 1
+        runner.L_init[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_init[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u)))
+        runner.L[ii][0,0] = 1
+        runner.L[ii][0,1] = -1
+        plt.axis('equal')
+        plt.pcolormesh(X, Y, runner.L[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u))))
+        plt.axis('equal')
+        runner.L_actual[ii][0,0] = 1
+        runner.L_actual[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_actual[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u))))
+        plt.axis('equal')
+        diff = runner.L[ii] - runner.L_actual[ii]
+        diff[0,0] = 1
+        diff[0,1] = -1
+        plt.pcolormesh(X, Y, diff)
+
+    print np.diag(runner.L[1])
+    print np.diag(runner.L_actual[1])
+
+    axes[0].set_ylabel('Initial L operator')
+    axes[1].set_ylabel('Learned L operator')
+    axes[2].set_ylabel('Actual L operator')
+    plt.suptitle('Learning L operators')
+    # plt.tight_layout()
+    plt.show()

From 452e8fe4709545b038440ddaca18bbdc0bd48211 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Fri, 15 Apr 2016 10:53:17 -0400
Subject: [PATCH 02/10] update to ra

---
 risk_aware_control/riskaware.py          | 14 +++++++++-----
 risk_aware_control/riskaware_learning.py | 24 ++++++++++--------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index 8305b3d..b56fec6 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -7,7 +7,10 @@ class Runner:
 
     def __init__(self):
         self.num_states = 400
-        self.domain = np.linspace(-10, 10, self.num_states)
+        self.limit = 10
+        self.domain = np.linspace(-self.limit, 
+                                   self.limit, 
+                                   self.num_states)
 
         self.num_systems = 3
         self.x = np.zeros(self.num_systems)
@@ -16,11 +19,11 @@ def __init__(self):
         self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)])
 
         self.drift = -.15 # systems drifts left
-        self.highlander_mode = True # allow more than one action at a time?
+        self.highlander_mode = False # allow more than one action at a time?
 
-        # have a controller that pushes us left
-        # self.u = np.array([1, -1, 5, -5, 10, -10])
+        # action set
         self.u = np.array([0, .5, -.5])
+
         self.L = []
         for u in self.u:
             offset = int(u / 20.0 * 400.0)
@@ -40,7 +43,8 @@ def make_gauss(self, mean=0, var=.5):
         return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
 
     def make_v(self, mean=0):
-        self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01)
+        self.v = self.make_gauss(mean=mean,var=2) + \
+                self.make_gauss(mean=mean,var=.01)
         self.v = self.v * 5 - 1
         self.v[np.where(self.v > 0)] = 1.0
 
diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py
index 9048c1f..fd438d0 100644
--- a/risk_aware_control/riskaware_learning.py
+++ b/risk_aware_control/riskaware_learning.py
@@ -24,8 +24,6 @@ def __init__(self):
         # for learning, start with no knowledge about actions
         self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \
                 for ii in range(len(self.u))]
-        # for ii in range(len(self.u)):
-        #     np.fill_diagonal(self.L[ii], -1*np.ones(self.num_states))
         
         # create some more for plotting later
         self.L_init = np.copy(self.L)
@@ -33,19 +31,15 @@ def __init__(self):
                 for ii in range(len(self.u))]
         for ii in range(len(self.u)):
             for jj in range(self.num_states):
+                # set the system state
                 self.x = self.domain[jj]
+                # measure the probability distribution of x
                 px_old = self.make_gauss(mean=self.x) 
-                px_old /= np.max(px_old)
-                px_old[np.where(px_old < 0)] = 0.0
+                # apply the control signal
                 self.physics(self.u[ii])
-                self.L_actual[ii][jj] = np.copy(self.px - px_old)
-
-            # offset = int((u - self.drift)/ 20.0 * 400.0)
-            # self.L_actual.append(
-            #     # moves away from current state
-            #     np.diag(np.ones(self.num_states)) * -1 +
-            #     # moves into state + u
-            #     np.diag(np.ones(self.num_states-abs(offset)), -offset))
+                # calculate the change in the probability distribution
+                self.L_actual[ii][:,jj] = np.copy(self.px - px_old)
+
         self.x = 0  # initial position
            
         self.gamma = 1e-1 # learning rate
@@ -60,7 +54,8 @@ def make_gauss(self, mean=0, var=.5):
         return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
 
     def make_v(self, mean=0):
-        self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01)
+        self.v = self.make_gauss(mean=mean,var=2) + \
+                self.make_gauss(mean=mean,var=.01)
         self.v = self.v * 2 - 1
         self.v[np.where(self.v > 0)] = 1.0
 
@@ -93,7 +88,6 @@ def anim_animate(self, i):
         index = self.wu.argmax()
         # add in some exploration
         if int(np.random.random()*10) == 5: 
-            print 'here'
             index = np.random.choice(range(3))
         val = self.wu[index]
         self.wu = np.zeros(len(self.u))
@@ -121,6 +115,7 @@ def anim_animate(self, i):
         learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities
         self.L[index] += learn
 
+        # update the line plots
         self.px_line.set_data(range(self.num_states), self.px)
         self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px))
         self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px))
@@ -196,6 +191,7 @@ def run(self):
     axes[0].set_ylabel('Initial L operator')
     axes[1].set_ylabel('Learned L operator')
     axes[2].set_ylabel('Actual L operator')
+    axes[3].set_ylabel('Difference')
     plt.suptitle('Learning L operators')
     # plt.tight_layout()
     plt.show()

From 7af17f8042dc5e653cc3b7427da0f86f51ca3fd0 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Tue, 10 May 2016 20:33:19 -0400
Subject: [PATCH 03/10] moved riskaware.py to using gen_px

---
 risk_aware_control/riskaware.py | 53 ++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index b56fec6..7c55f35 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -8,6 +8,7 @@ class Runner:
     def __init__(self):
         self.num_states = 400
         self.limit = 10
+        self.dx = self.limit * 2.0 / self.num_states
         self.domain = np.linspace(-self.limit, 
                                    self.limit, 
                                    self.num_states)
@@ -24,6 +25,22 @@ def __init__(self):
         # action set
         self.u = np.array([0, .5, -.5])
 
+        # self.L = [np.zeros((self.num_states, self.num_states)) \
+        #         for ii in range(len(self.u))]
+        # for ii in range(len(self.u)):
+        #     for jj in range(self.num_states):
+        #         # set the system state
+        #         self.x = self.domain[jj]
+        #         # get the probability distribution of x
+        #         old_px = self.gen_px()
+        #         # apply the control signal
+        #         self.physics(self.u[ii])
+        #         # get the new probability distribution of x
+        #         px = self.gen_px()
+        #         # calculate the change in the probability distribution
+        #         # self.L_actual[ii][jj] = np.copy(px_old - px)
+        #         self.L_actual[ii][:,jj] = np.copy(px - old_px)
+ 
         self.L = []
         for u in self.u:
             offset = int(u / 20.0 * 400.0)
@@ -51,10 +68,22 @@ def make_v(self, mean=0):
     def physics(self, u):
         for ii in range(self.num_systems):
             self.x[ii] += (self.drift + u[ii]) # simple physics
-            self.px[ii] = self.make_gauss(self.x[ii], self.var[ii]) 
-            self.px[ii] /= np.max(self.px[ii])
-            # clip at zero and normalize px
-            self.px[ii][np.where(self.px[ii] < 0)] = 0.0
+            self.x[ii] = min(self.limit, max(-self.limit, self.x[ii]))
+
+    def gen_px(self, x=None, var=None):
+        x = np.copy(self.x) if x is None else x
+        var = self.var if var is None else var
+
+        px = np.zeros((self.num_systems, self.num_states))
+        # TODO: rewrite this to avoid the for loop just using matrices 
+        for ii in range(self.num_systems):
+            px[ii] = self.make_gauss(x[ii], var[ii]) 
+            # make sure no negative values
+            px[ii][np.where(px[ii] < 0)] = 0.0
+            # make sure things sum to 1
+            px[ii] /= np.max(np.sum(px[ii]) * self.dx)
+        return px
+
 
     def anim_init(self):
         self.v_line.set_data([], [])
@@ -70,12 +99,13 @@ def anim_animate(self, i):
         self.wu = np.zeros((self.num_systems, len(self.u)))
         for ii, Li in enumerate(self.L):
             for jj in range(self.num_systems):
-                if self.highlander_mode is False: 
-                    self.wu[jj,ii] = min(1,
-                            max(0, np.dot(self.v, np.dot(Li, self.px[jj]))))
-                else:
+                # check to see if there can be only one
+                if self.highlander_mode is True: 
                     # don't clip it here so we can tell the actual winner 
                     self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj]))
+                else:
+                    self.wu[jj,ii] = min(1,
+                            max(0, np.dot(self.v, np.dot(Li, self.px[jj]))))
         # select the strongest action 
         if self.highlander_mode is True:
             for ii in range(self.num_systems):
@@ -85,16 +115,17 @@ def anim_animate(self, i):
                 # now clip it
                 self.wu[ii,index] = min(1, val)
 
-        print self.wu
-
         # track information for plotting
         self.track_position.append(np.copy(self.x))
         # get edges of value function
         road = np.where(self.v == 1)
         self.track_target.append(np.array([self.domain[road[0][0]], # left edge
                                            self.domain[road[0][-1]]])) # right edge
-        # simulate dynamics and get new state
+        # apply the control signal, simulate dynamics and get new state
         self.physics(np.dot(self.wu, self.u))
+        # get the new probability distribution of x
+        self.px = self.gen_px()
+
         # move the target around slowly
         self.make_v(np.sin(i*.1)*5)
 

From cf31068cd087bc26b658b557b253e8b5254a7fdf Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Tue, 10 May 2016 20:33:58 -0400
Subject: [PATCH 04/10] riskaware_learning working properly

---
 risk_aware_control/riskaware_learning.py | 57 +++++++++++++++---------
 1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py
index fd438d0..d22ce6a 100644
--- a/risk_aware_control/riskaware_learning.py
+++ b/risk_aware_control/riskaware_learning.py
@@ -6,21 +6,25 @@
 class Runner: 
 
     def __init__(self):
-        self.num_states = 400
-        self.limit = 10
+        self.num_states = 200
+        self.limit = 20
+        self.dx = self.limit * 2.0 / self.num_states
         self.domain = np.linspace(-self.limit, 
                                    self.limit, 
                                    self.num_states)
 
         self.x = 0  # initial position
-        self.var = .5 # variance in sensory information
-        # the initial state probability 
-        self.px = self.make_gauss()
+        self.var = .4 # variance in sensory information
+        self.px = self.make_gauss() # the initial state probability 
 
-        self.drift = 0#-.15 # constant slide of the system in this direction 
+        # constant slide of the system in this direction 
+        self.drift = 1.5 
+
+        # how often should the system randomly choose an action (0-1)
+        self.exploration = 0.0 
 
         # action set
-        self.u = np.array([0, .5, -.5])
+        self.u = np.array([0, .5, -1, 3, -5])
         # for learning, start with no knowledge about actions
         self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \
                 for ii in range(len(self.u))]
@@ -33,15 +37,18 @@ def __init__(self):
             for jj in range(self.num_states):
                 # set the system state
                 self.x = self.domain[jj]
-                # measure the probability distribution of x
-                px_old = self.make_gauss(mean=self.x) 
+                # get the probability distribution of x
+                old_px = self.gen_px()
                 # apply the control signal
                 self.physics(self.u[ii])
+                # get the new probability distribution of x
+                px = self.gen_px()
                 # calculate the change in the probability distribution
-                self.L_actual[ii][:,jj] = np.copy(self.px - px_old)
+                # self.L_actual[ii][jj] = np.copy(px_old - px)
+                self.L_actual[ii][:,jj] = np.copy(px - old_px)
+            # self.L[ii] = np.copy(self.L_actual[ii])
 
         self.x = 0  # initial position
-           
         self.gamma = 1e-1 # learning rate
 
         # also need a cost function (Gaussian to move towards the center)
@@ -60,12 +67,19 @@ def make_v(self, mean=0):
         self.v[np.where(self.v > 0)] = 1.0
 
     def physics(self, u):
-        self.x += (self.drift + u) # simple physics
+        self.x += self.drift + u # simple physics
         self.x = min(self.limit, max(-self.limit, self.x))
-        self.px = self.make_gauss(self.x, self.var) 
-        self.px /= np.max(self.px)
-        # clip at zero and normalize px
-        self.px[np.where(self.px < 0)] = 0.0
+
+    def gen_px(self, x=None, var=None):
+        x = np.copy(self.x) if x is None else x
+        var = self.var if var is None else var
+
+        px = self.make_gauss(x, var) 
+        # make sure no negative values
+        px[np.where(px < 0)] = 0.0
+        # make sure things sum to 1
+        px /= np.max(np.sum(px) * self.dx)
+        return px
 
     def anim_init(self):
         self.v_line.set_data([], [])
@@ -107,12 +121,16 @@ def anim_animate(self, i):
 
         # simulate dynamics and get new state
         self.physics(np.dot(self.wu, self.u))
+        # generate the new px
+        self.px = self.gen_px()
         # move the target around slowly
-        self.make_v(np.sin(i*.01)*9)
+        self.make_v(np.sin(i*.01)*(self.limit-1))
 
         # do our learning
         err = (self.px - self.old_px) - self.dpx_estimate
-        learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities
+        # NOTE: need to use old_px because that's what our 
+        # dpx_estimate was calculated for
+        learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities
         self.L[index] += learn
 
         # update the line plots
@@ -185,9 +203,6 @@ def run(self):
         diff[0,1] = -1
         plt.pcolormesh(X, Y, diff)
 
-    print np.diag(runner.L[1])
-    print np.diag(runner.L_actual[1])
-
     axes[0].set_ylabel('Initial L operator')
     axes[1].set_ylabel('Learned L operator')
     axes[2].set_ylabel('Actual L operator')

From 24b84550a0244dd645c45dcbdbcc5e2272e77ee2 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Fri, 20 May 2016 10:11:08 -0400
Subject: [PATCH 05/10] reworked riskaware control, sample to gen L, better
 plots, weight selection

---
 risk_aware_control/riskaware.py               | 191 +++++++++-----
 risk_aware_control/riskaware_learning.py      |   2 -
 .../riskaware_learning_multiple.py            | 216 ++++++++++++++++
 risk_aware_control/riskaware_neurons.py       | 242 ++++++++++++++++++
 4 files changed, 580 insertions(+), 71 deletions(-)
 create mode 100644 risk_aware_control/riskaware_learning_multiple.py
 create mode 100644 risk_aware_control/riskaware_neurons.py

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index 7c55f35..036cd22 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -6,64 +6,57 @@
 class Runner: 
 
     def __init__(self):
-        self.num_states = 400
+        self.num_states = 200
         self.limit = 10
         self.dx = self.limit * 2.0 / self.num_states
         self.domain = np.linspace(-self.limit, 
                                    self.limit, 
-                                   self.num_states)
+                                   self.num_states)[:,None]
 
         self.num_systems = 3
-        self.x = np.zeros(self.num_systems)
         self.var = [3, 1, .5]
-        # the initial state probability 
-        self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)])
 
-        self.drift = -.15 # systems drifts left
+        self.drift = 0#-.15 # systems drifts left
         self.highlander_mode = False # allow more than one action at a time?
 
         # action set
         self.u = np.array([0, .5, -.5])
 
-        # self.L = [np.zeros((self.num_states, self.num_states)) \
-        #         for ii in range(len(self.u))]
-        # for ii in range(len(self.u)):
-        #     for jj in range(self.num_states):
-        #         # set the system state
-        #         self.x = self.domain[jj]
-        #         # get the probability distribution of x
-        #         old_px = self.gen_px()
-        #         # apply the control signal
-        #         self.physics(self.u[ii])
-        #         # get the new probability distribution of x
-        #         px = self.gen_px()
-        #         # calculate the change in the probability distribution
-        #         # self.L_actual[ii][jj] = np.copy(px_old - px)
-        #         self.L_actual[ii][:,jj] = np.copy(px - old_px)
+        self.L = np.zeros((len(self.var), 
+            len(self.u), self.num_states, self.num_states)) 
+        for ii in range(len(self.u)):
+            for jj in range(self.num_states):
+                # set the system state
+                self.x = np.ones(self.num_states) * self.domain[jj]
+                # get the probability distribution of x
+                old_px = self.gen_px()
+                # apply the control signal
+                self.physics(np.ones(self.num_systems) * self.u[ii])
+                # get the new probability distribution of x
+                px = self.gen_px()
+                # calculate the change in the probability distribution
+                self.L[:, ii, :, jj] = np.copy(px - old_px)
  
-        self.L = []
-        for u in self.u:
-            offset = int(u / 20.0 * 400.0)
-            self.L.append(
-                # moves away from current state
-                np.diag(np.ones(self.num_states)) * -1 +
-                # moves into state + u
-                np.diag(np.ones(self.num_states-abs(offset)), -offset))
+        # the initial state probability 
+        self.x = np.zeros(self.num_systems)
+        self.px = self.gen_px() 
 
         # also need a cost function (Gaussian to move towards the center)
         self.make_v()
 
         self.track_position = []
-        self.track_target = []
+        self.track_target1 = []
+        self.track_target2 = []
 
     def make_gauss(self, mean=0, var=.5):
-        return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
+        return np.exp(-(self.domain-mean)**2 / (2.0*var**2)) 
 
     def make_v(self, mean=0):
-        self.v = self.make_gauss(mean=mean,var=2) + \
-                self.make_gauss(mean=mean,var=.01)
-        self.v = self.v * 5 - 1
-        self.v[np.where(self.v > 0)] = 1.0
+        # set up the road
+        self.v = self.make_gauss(mean=mean,var=2) * 5 - 1
+        self.v[np.where(self.v > .5)] = .5
+        # make a preference for being in the right lane
+        self.v += self.make_gauss(mean=mean+2,var=.6)
 
     def physics(self, u):
         for ii in range(self.num_systems):
@@ -77,7 +70,7 @@ def gen_px(self, x=None, var=None):
         px = np.zeros((self.num_systems, self.num_states))
         # TODO: rewrite this to avoid the for loop just using matrices 
         for ii in range(self.num_systems):
-            px[ii] = self.make_gauss(x[ii], var[ii]) 
+            px[ii] = self.make_gauss(x[ii], var[ii]).flatten()
             # make sure no negative values
             px[ii][np.where(px[ii] < 0)] = 0.0
             # make sure things sum to 1
@@ -97,30 +90,42 @@ def anim_animate(self, i):
 
         # calculate the weights for the actions
         self.wu = np.zeros((self.num_systems, len(self.u)))
-        for ii, Li in enumerate(self.L):
-            for jj in range(self.num_systems):
-                # check to see if there can be only one
-                if self.highlander_mode is True: 
-                    # don't clip it here so we can tell the actual winner 
-                    self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj]))
-                else:
-                    self.wu[jj,ii] = min(1,
-                            max(0, np.dot(self.v, np.dot(Li, self.px[jj]))))
-        # select the strongest action 
-        if self.highlander_mode is True:
-            for ii in range(self.num_systems):
-                index = self.wu[ii].argmax()
-                val = self.wu[ii, index]
-                self.wu[ii] = np.zeros(len(self.u))
-                # now clip it
-                self.wu[ii,index] = min(1, val)
+        # for ii in range(len(self.u)):
+        #     for jj in range(self.num_systems):
+        #         # check to see if there can be only one
+        #         if self.highlander_mode is True: 
+        #             # don't clip it here so we can tell the actual winner 
+        #             self.wu[jj,ii] = np.dot(self.v, np.dot(self.L[:, ii], self.px[jj]))
+        #         else:
+        #             self.wu[jj,ii] = min(1,
+        #                     max(0, np.dot(self.v.T, np.dot(self.L[jj,ii], self.px[jj]))))
+        
+        for ii in range(self.num_systems):
+            for jj in range(len(self.u)):
+                # constrain so that you can only weight actions positively
+                self.wu[ii,jj] = max(0, np.dot(self.v.T, np.dot(self.L[ii,jj], self.px[ii])))
+            if np.sum(self.wu[ii]) != 0:
+                # constrain so that total output power sum_j u_j**2 = 1
+                self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2))
+
+        # # select the strongest action 
+        # if self.highlander_mode is True:
+        #     for ii in range(self.num_systems):
+        #         index = self.wu[ii].argmax()
+        #         val = self.wu[ii, index]
+        #         self.wu[ii] = np.zeros(len(self.u))
+        #         # now clip it
+        #         self.wu[ii,index] = min(1, val)
 
         # track information for plotting
         self.track_position.append(np.copy(self.x))
         # get edges of value function
-        road = np.where(self.v == 1)
-        self.track_target.append(np.array([self.domain[road[0][0]], # left edge
+        road = np.where(self.v >= .5)
+        self.track_target1.append(np.array([self.domain[road[0][0]], # left edge
                                            self.domain[road[0][-1]]])) # right edge
+        lane = np.where(self.v >= .6)
+        self.track_target2.append(np.array([self.domain[lane[0][0]], # left edge
+                                           self.domain[lane[0][-1]]])) # right edge
         # apply the control signal, simulate dynamics and get new state
         self.physics(np.dot(self.wu, self.u))
         # get the new probability distribution of x
@@ -129,7 +134,6 @@ def anim_animate(self, i):
         # move the target around slowly
         self.make_v(np.sin(i*.1)*5)
 
-
         self.px_line0.set_data(range(self.num_states), self.px[0])
         self.px_line1.set_data(range(self.num_states), self.px[1])
         self.px_line2.set_data(range(self.num_states), self.px[2])
@@ -152,6 +156,7 @@ def run(self):
         anim = animation.FuncAnimation(fig, self.anim_animate, 
                     init_func=self.anim_init, frames=500, 
                     interval=100, blit=True)
+
         plt.show()
 
 if __name__ == '__main__':
@@ -161,33 +166,81 @@ def run(self):
 
     # generate some nice plots
     fig = plt.figure(figsize=(8, 8))
+
+    # do some scaling to plot the same as seaborn heat plots
+    fix_for_plot = lambda x: (np.array(x).squeeze() / 
+            runner.limit / -2.0 + .5) * runner.num_states
+    track_target1 = fix_for_plot(runner.track_target1)
+    track_target2 = fix_for_plot(runner.track_target2)
+    track_position = fix_for_plot(runner.track_position)
     runner.track_position = np.array(runner.track_position)
-    time = runner.track_position.shape[0]
+
+    time = track_position.shape[0]
     X = np.arange(0, time)
     Y = runner.domain
     X, Y = np.meshgrid(X, Y)
+
+    plt.subplot(runner.num_systems+2, 1, 1)
+    plt.title('Position on road')
+
+    plt.subplot(runner.num_systems+2, 1, 4)
+    plt.fill_between(range(track_target2.shape[0]), 
+            track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.25)
+    plt.fill_between(range(track_target1.shape[0]), 
+            track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.25)
+
+
     for ii in range(runner.num_systems):
         # plot borders, and path and heatmap for system ii
-        plt.subplot(runner.num_systems+1, 1, ii+1)
-        plt.plot(runner.track_position[:,ii], 'b', lw=5) # plot actual position of each system
-        plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries
+        plt.subplot(runner.num_systems+2, 1, ii+1)
+        # plot road boundaries
+        plt.plot(track_target1, 'r--', lw=5) 
+
         # plot a heat map showing sensor information
         heatmap = np.zeros((runner.num_states, time))
         for jj in range(time):
-            heatmap[:,jj] = runner.make_gauss(mean=runner.track_position[jj, ii], 
-                                              var=runner.var[ii])
-        plt.pcolormesh(X, Y, heatmap, cmap='terrain_r')   
-
-
-        plt.title('Variance = %.2f'%runner.var[ii])
+            heatmap[:,jj] = runner.make_gauss(
+                    mean=runner.track_position[jj, ii], 
+                    var=runner.var[ii]).flatten()
+        seaborn.heatmap(heatmap, xticklabels=False, yticklabels=False, 
+                cbar=False, cmap='Blues')   
+
+        # plot filled in zones of desirability, first the road
+        plt.fill_between(range(track_target1.shape[0]), 
+                track_target1[:,0], track_target1[:,1], 
+                facecolor='y', alpha=.25)
+        # and now the lane
+        plt.fill_between(range(track_target2.shape[0]), 
+                track_target2[:,0], track_target2[:,1], 
+                facecolor='orange', alpha=.25)
+
+        # plot actual position of each system
+        line, = plt.plot(track_position[:,ii], 'k', lw=3) 
+
+        plt.legend([line], ['Variance = %.2f'%runner.var[ii]], 
+                frameon=True, bbox_to_anchor=[1,1.05])
         plt.xlim([0, time-1])
+        plt.ylabel('Position')
 
         # plot the borders and path of each
-        plt.subplot(runner.num_systems+1, 1, 4)
-        plt.plot(runner.track_position[:,ii], lw=3) # plot actual position of each system
-    plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries
+        ax = plt.subplot(runner.num_systems+2, 1, 4)
+
+        plt.plot(track_position[:,ii], lw=3) # plot actual position of each system
+
     plt.xlim([0, time-1])
     plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05])
+    plt.ylabel('Position')
+    ax.set_xticklabels([])
+    ax.set_yticklabels([])
+
+    plt.subplot(runner.num_systems+2, 1, 5)
+    target_center = np.sin(X[0]*.1)*5 + 2
+    plt.plot((runner.track_position[:,0] - target_center)**2, lw=2)
+    plt.plot((runner.track_position[:,1] - target_center)**2, lw=2)
+    plt.plot((runner.track_position[:,2] - target_center)**2, lw=2)
+    plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05])
+    plt.title('Distance from center of lane')
+    plt.ylabel('Squared error')
     plt.xlabel('Time')
 
     plt.tight_layout()
diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py
index d22ce6a..7a21473 100644
--- a/risk_aware_control/riskaware_learning.py
+++ b/risk_aware_control/riskaware_learning.py
@@ -44,9 +44,7 @@ def __init__(self):
                 # get the new probability distribution of x
                 px = self.gen_px()
                 # calculate the change in the probability distribution
-                # self.L_actual[ii][jj] = np.copy(px_old - px)
                 self.L_actual[ii][:,jj] = np.copy(px - old_px)
-            # self.L[ii] = np.copy(self.L_actual[ii])
 
         self.x = 0  # initial position
         self.gamma = 1e-1 # learning rate
diff --git a/risk_aware_control/riskaware_learning_multiple.py b/risk_aware_control/riskaware_learning_multiple.py
new file mode 100644
index 0000000..31b8c13
--- /dev/null
+++ b/risk_aware_control/riskaware_learning_multiple.py
@@ -0,0 +1,216 @@
+import numpy as np
+import seaborn
+import matplotlib.pyplot as plt
+from matplotlib import animation
+
+class Runner: 
+    """ The goal of this script is to be able to learn 
+    the L operators of several actions simultaneously, 
+    such that you don't have to choose only one action 
+    at a time during training. """ 
+
+    def __init__(self):
+        self.num_states = 200
+        self.limit = 20
+        self.dx = self.limit * 2.0 / self.num_states
+        self.domain = np.linspace(-self.limit, 
+                                   self.limit, 
+                                   self.num_states)
+
+        self.x = 0  # initial position
+        self.var = .4 # variance in sensory information
+        # the initial state probability 
+        self.px = self.make_gauss()
+
+        self.drift = 1.5 # constant slide of the system in this direction 
+
+        # action set
+        self.u = np.array([0, .5, -1, 3, -5])
+        # for learning, start with no knowledge about actions
+        self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \
+                for ii in range(len(self.u))]
+        
+        # create some more for plotting later
+        self.L_init = np.copy(self.L)
+        self.L_actual = [np.zeros((self.num_states, self.num_states)) \
+                for ii in range(len(self.u))]
+        for ii in range(len(self.u)):
+            for jj in range(self.num_states):
+                # set the system state
+                self.x = self.domain[jj]
+                # get the probability distribution of x
+                old_px = self.gen_px()
+                # apply the control signal
+                self.physics(self.u[ii])
+                # get the new probability distribution of x
+                px = self.gen_px()
+                # calculate the change in the probability distribution
+                # self.L_actual[ii][jj] = np.copy(px_old - px)
+                self.L_actual[ii][:,jj] = np.copy(px - old_px)
+            # self.L[ii] = np.copy(self.L_actual[ii])
+
+        self.x = 0  # initial position
+        self.gamma = 1e-1 # learning rate
+
+        # also need a cost function (Gaussian to move towards the center)
+        self.make_v()
+
+        self.track_position = []
+        self.track_target = []
+
+    def make_gauss(self, mean=0, var=.5):
+        return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
+
+    def make_v(self, mean=0):
+        self.v = self.make_gauss(mean=mean,var=2) + \
+                self.make_gauss(mean=mean,var=.01)
+        self.v = self.v * 2 - 1
+        self.v[np.where(self.v > 0)] = 1.0
+
+    def physics(self, u):
+        self.x += self.drift + u # simple physics
+        self.x = min(self.limit, max(-self.limit, self.x))
+
+    def gen_px(self, x=None, var=None, noise=False):
+        x = np.copy(self.x) if x is None else x
+        var = self.var if var is None else var
+
+        if noise is True: 
+            x += int(np.random.random() * 2)
+            x = min(self.limit, max(-self.limit, x))
+        px = self.make_gauss(x, var) 
+        # make sure no negative values
+        px[np.where(px < 0)] = 0.0
+        # make sure things sum to 1
+        px /= np.max(np.sum(px) * self.dx)
+        return px
+
+    def anim_init(self):
+        self.v_line.set_data([], [])
+        self.px_line.set_data([], [])
+        self.Lpx_line0.set_data([], [])
+        self.Lpx_line1.set_data([], [])
+        self.Lpx_line2.set_data([], [])
+        plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px'])
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def anim_animate(self, i):
+
+        # calculate the weights for the actions
+        self.wu = np.zeros(len(self.u))
+        for ii, Li in enumerate(self.L):
+            # don't clip it here so we can tell the actual winner 
+            self.wu[ii] = np.dot(self.v, np.dot(Li, self.px))
+
+        # set losers to zero (there can be only one action selected)
+        index = self.wu.argmax()
+        # add in some exploration
+        if int(np.random.random()*10) == 5: 
+            index = np.random.choice(range(3))
+        val = self.wu[index]
+        self.wu = np.zeros(len(self.u))
+        # now clip it
+        self.wu[index] = 1# min(1, val)
+        # store the predicted change in probability
+        self.dpx_estimate = np.dot(self.L[index], self.px)
+        # also store the old px for calculating dpx
+        self.old_px = np.copy(self.px)
+
+        # track information for plotting
+        self.track_position.append(np.copy(self.x))
+        # get edges of value function
+        road = np.where(self.v == 1)
+        self.track_target.append(np.array([self.domain[road[0][0]], # left edge
+                                           self.domain[road[0][-1]]])) # right edge
+
+        # simulate dynamics and get new state
+        self.physics(np.dot(self.wu, self.u))
+        # generate the new px
+        self.px = self.gen_px(noise=False)
+        # move the target around slowly
+        self.make_v(np.sin(i*.01)*(self.limit-1))
+
+        # do our learning
+        err = (self.px - self.old_px) - self.dpx_estimate
+        # NOTE: need to use old_px because that's what our 
+        # dpx_estimate was calculated for
+        learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities
+        self.L[index] += learn
+
+        # update the line plots
+        self.px_line.set_data(range(self.num_states), self.px)
+        self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px))
+        self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px))
+        self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px))
+        self.v_line.set_data(range(self.num_states), self.v)
+
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def run(self): 
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+        self.v_line, = ax.plot([],[], color='r', lw=3)
+        self.px_line, = ax.plot([],[], color='k', lw=3)
+        self.Lpx_line0, = ax.plot([],[], color='b', lw=3)
+        self.Lpx_line1, = ax.plot([],[], color='g', lw=3)
+        self.Lpx_line2, = ax.plot([],[], color='y', lw=3)
+        
+        plt.xlim([0, self.num_states-1])
+        plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11))
+        plt.ylim([-1, 1])
+
+        anim = animation.FuncAnimation(fig, self.anim_animate, 
+                    init_func=self.anim_init, frames=10000, 
+                    interval=0, blit=True)
+        plt.show()
+
+if __name__ == '__main__':
+
+    runner = Runner()
+    runner.run()
+
+    # generate some nice plots
+    axes = []
+    X, Y = np.meshgrid(runner.domain, runner.domain)
+    plt.figure(figsize=(9,9))
+    for ii in range(len(runner.u)):
+        # axes.append(plt.subplot(1,3,ii))
+        # plt.axis('equal')
+        # runner.L[ii][0,0] = 1
+        # runner.L[ii][0,1] = -1
+        # plt.pcolormesh(X, Y, runner.L[ii])#, cmap='terrain_r')   
+     
+        # plot the starting vs ending vs actual L operators
+        # plot a heat map showing sensor information
+        axes.append(plt.subplot(4, len(runner.u), ii+1))
+        plt.axis('equal')
+        runner.L_init[ii][0,0] = 1
+        runner.L_init[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_init[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u)))
+        runner.L[ii][0,0] = 1
+        runner.L[ii][0,1] = -1
+        plt.axis('equal')
+        plt.pcolormesh(X, Y, runner.L[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u))))
+        plt.axis('equal')
+        runner.L_actual[ii][0,0] = 1
+        runner.L_actual[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_actual[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u))))
+        plt.axis('equal')
+        diff = runner.L[ii] - runner.L_actual[ii]
+        diff[0,0] = 1
+        diff[0,1] = -1
+        plt.pcolormesh(X, Y, diff)
+
+    axes[0].set_ylabel('Initial L operator')
+    axes[1].set_ylabel('Learned L operator')
+    axes[2].set_ylabel('Actual L operator')
+    axes[3].set_ylabel('Difference')
+    plt.suptitle('Learning L operators')
+    # plt.tight_layout()
+    plt.show()
diff --git a/risk_aware_control/riskaware_neurons.py b/risk_aware_control/riskaware_neurons.py
new file mode 100644
index 0000000..d066178
--- /dev/null
+++ b/risk_aware_control/riskaware_neurons.py
@@ -0,0 +1,242 @@
+import numpy as np
+import seaborn
+import matplotlib.pyplot as plt
+from matplotlib import animation
+
+class Runner: 
+    """ In this class we're hooking up 200 neurons to the 'car',
+    so that half of them drive the car to the left, and half of 
+    them drive the car to the right. The risk-aware system needs
+    to control the car through these neurons. """
+
+    def __init__(self):
+        self.num_states = 200
+        self.limit = 20
+        self.dx = self.limit * 2.0 / self.num_states
+        self.domain = np.linspace(-self.limit, 
+                                   self.limit, 
+                                   self.num_states)
+
+        self.x = 0  # initial position
+        self.var = .4 # variance in sensory information
+        # the initial state probability 
+        self.px = self.make_gauss()
+
+        self.drift = 1.5 # constant slide of the system in this direction 
+
+        # action set
+        import nengo
+        model = nengo.Network()
+        # I think that the way for this to work is for the input 
+        # to these neurons to be something like direct stimulation,
+        # and relatively strong, so projecting in either 10 or 0 
+        # as the two options...at least initially. 
+        # TODO: train up a system where the input can be a range of values
+        # NOTE: this would possibly just be a whole other set of actions...
+        # so maybe the way to think about it is that activating each individual
+        # neuron is an action, and so is not activating each neuron. And then 
+        # if you want to be able to input a range of values that's just more actions.
+        with model: 
+            ensemble = nengo.Ensemble(n_neurons=200, dimensions=1) # dimensions is irrelevant
+
+            # TODO: is the input here supposed to be direct neural stimulation?
+            def get_input(t):
+                return self.u
+            node_in = nengo.Node(output=get_input, size_out=ensemblen_neurons) 
+            # send the input to directly stimulate each neuron
+            nengo.Connection(node_in, ensemble.neurons)
+
+            def set_output(t, x): 
+                self.neuron_activity = np.copy(x)
+            node_out = nengo.Node(output=set_output, size_in=1)
+
+            # half the neurons push to the left (negative weight)
+            # half the neurons push to the right (positive weight)
+            weights = np.random.random((200,1)) * 10.0 - 5.0
+            nengo.Connection(ensemble, node_out, 
+                    transform=weights)
+            # TODO: add in some noise
+
+        # for learning, start with no knowledge about actions
+        self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \
+                for ii in range(len(self.u))]
+        
+        # create some more for plotting later
+        self.L_init = np.copy(self.L)
+        self.L_actual = [np.zeros((self.num_states, self.num_states)) \
+                for ii in range(len(self.u))]
+        for ii in range(len(self.u)):
+            for jj in range(self.num_states):
+                # set the system state
+                self.x = self.domain[jj]
+                # get the probability distribution of x
+                old_px = self.gen_px()
+                # apply the control signal
+                self.physics(self.u[ii])
+                # get the new probability distribution of x
+                px = self.gen_px()
+                # calculate the change in the probability distribution
+                # self.L_actual[ii][jj] = np.copy(px_old - px)
+                self.L_actual[ii][:,jj] = np.copy(px - old_px)
+            # self.L[ii] = np.copy(self.L_actual[ii])
+
+        self.x = 0  # initial position
+        self.gamma = 1e-1 # learning rate
+
+        # also need a cost function (Gaussian to move towards the center)
+        self.make_v()
+
+        self.track_position = []
+        self.track_target = []
+
+    def make_gauss(self, mean=0, var=.5):
+        return np.exp(-(self.domain-mean)**2 / (2*var**2)) 
+
+    def make_v(self, mean=0):
+        self.v = self.make_gauss(mean=mean,var=2) + \
+                self.make_gauss(mean=mean,var=.01)
+        self.v = self.v * 2 - 1
+        self.v[np.where(self.v > 0)] = 1.0
+
+    def physics(self, u):
+        self.x += self.drift + u # simple physics
+        self.x = min(self.limit, max(-self.limit, self.x))
+
+    def gen_px(self, x=None, var=None, noise=False):
+        x = np.copy(self.x) if x is None else x
+        var = self.var if var is None else var
+
+        if noise is True: 
+            x += int(np.random.random() * 2)
+            x = min(self.limit, max(-self.limit, x))
+        px = self.make_gauss(x, var) 
+        # make sure no negative values
+        px[np.where(px < 0)] = 0.0
+        # make sure things sum to 1
+        px /= np.max(np.sum(px) * self.dx)
+        return px
+
+    def anim_init(self):
+        self.v_line.set_data([], [])
+        self.px_line.set_data([], [])
+        self.Lpx_line0.set_data([], [])
+        self.Lpx_line1.set_data([], [])
+        self.Lpx_line2.set_data([], [])
+        plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px'])
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def anim_animate(self, i):
+
+        # calculate the weights for the actions
+        self.wu = np.zeros(len(self.u))
+        for ii, Li in enumerate(self.L):
+            # don't clip it here so we can tell the actual winner 
+            self.wu[ii] = np.dot(self.v, np.dot(Li, self.px))
+
+        # set losers to zero (there can be only one action selected)
+        index = self.wu.argmax()
+        # add in some exploration
+        if int(np.random.random()*10) == 5: 
+            index = np.random.choice(range(3))
+        val = self.wu[index]
+        self.wu = np.zeros(len(self.u))
+        # now clip it
+        self.wu[index] = 1# min(1, val)
+        # store the predicted change in probability
+        self.dpx_estimate = np.dot(self.L[index], self.px)
+        # also store the old px for calculating dpx
+        self.old_px = np.copy(self.px)
+
+        # track information for plotting
+        self.track_position.append(np.copy(self.x))
+        # get edges of value function
+        road = np.where(self.v == 1)
+        self.track_target.append(np.array([self.domain[road[0][0]], # left edge
+                                           self.domain[road[0][-1]]])) # right edge
+
+        # simulate dynamics and get new state
+        self.physics(np.dot(self.wu, self.u))
+        # generate the new px
+        self.px = self.gen_px(noise=False)
+        # move the target around slowly
+        self.make_v(np.sin(i*.01)*(self.limit-1))
+
+        # do our learning
+        err = (self.px - self.old_px) - self.dpx_estimate
+        # NOTE: need to use old_px because that's what our 
+        # dpx_estimate was calculated for
+        learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities
+        self.L[index] += learn
+
+        # update the line plots
+        self.px_line.set_data(range(self.num_states), self.px)
+        self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px))
+        self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px))
+        self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px))
+        self.v_line.set_data(range(self.num_states), self.v)
+
+        return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2
+
+    def run(self): 
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+        self.v_line, = ax.plot([],[], color='r', lw=3)
+        self.px_line, = ax.plot([],[], color='k', lw=3)
+        self.Lpx_line0, = ax.plot([],[], color='b', lw=3)
+        self.Lpx_line1, = ax.plot([],[], color='g', lw=3)
+        self.Lpx_line2, = ax.plot([],[], color='y', lw=3)
+        
+        plt.xlim([0, self.num_states-1])
+        plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11))
+        plt.ylim([-1, 1])
+
+        anim = animation.FuncAnimation(fig, self.anim_animate, 
+                    init_func=self.anim_init, frames=10000, 
+                    interval=0, blit=True)
+        plt.show()
+
+if __name__ == '__main__':
+
+    runner = Runner()
+    runner.run()
+
+    # generate some nice plots
+    axes = []
+    X, Y = np.meshgrid(runner.domain, runner.domain)
+    plt.figure(figsize=(9,9))
+    for ii in range(len(runner.u)):
+     
+        # plot the starting vs ending vs actual L operators
+        # plot a heat map showing sensor information
+        axes.append(plt.subplot(4, len(runner.u), ii+1))
+        plt.axis('equal')
+        runner.L_init[ii][0,0] = 1
+        runner.L_init[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_init[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u)))
+        runner.L[ii][0,0] = 1
+        runner.L[ii][0,1] = -1
+        plt.axis('equal')
+        plt.pcolormesh(X, Y, runner.L[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u))))
+        plt.axis('equal')
+        runner.L_actual[ii][0,0] = 1
+        runner.L_actual[ii][0,1] = -1
+        plt.pcolormesh(X, Y, runner.L_actual[ii])
+
+        axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u))))
+        plt.axis('equal')
+        diff = runner.L[ii] - runner.L_actual[ii]
+        diff[0,0] = 1
+        diff[0,1] = -1
+        plt.pcolormesh(X, Y, diff)
+
+    axes[0].set_ylabel('Initial L operator')
+    axes[1].set_ylabel('Learned L operator')
+    axes[2].set_ylabel('Actual L operator')
+    axes[3].set_ylabel('Difference')
+    plt.suptitle('Learning L operators')
+    # plt.tight_layout()
+    plt.show()

From 52d239a85bfc25187af4f00c87c5977493614869 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Fri, 20 May 2016 10:11:37 -0400
Subject: [PATCH 06/10] removed highlander mode options in riskaware

---
 risk_aware_control/riskaware.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index 036cd22..d17ab1d 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -17,7 +17,6 @@ def __init__(self):
         self.var = [3, 1, .5]
 
         self.drift = 0#-.15 # systems drifts left
-        self.highlander_mode = False # allow more than one action at a time?
 
         # action set
         self.u = np.array([0, .5, -.5])
@@ -90,15 +89,6 @@ def anim_animate(self, i):
 
         # calculate the weights for the actions
         self.wu = np.zeros((self.num_systems, len(self.u)))
-        # for ii in range(len(self.u)):
-        #     for jj in range(self.num_systems):
-        #         # check to see if there can be only one
-        #         if self.highlander_mode is True: 
-        #             # don't clip it here so we can tell the actual winner 
-        #             self.wu[jj,ii] = np.dot(self.v, np.dot(self.L[:, ii], self.px[jj]))
-        #         else:
-        #             self.wu[jj,ii] = min(1,
-        #                     max(0, np.dot(self.v.T, np.dot(self.L[jj,ii], self.px[jj]))))
         
         for ii in range(self.num_systems):
             for jj in range(len(self.u)):
@@ -108,15 +98,6 @@ def anim_animate(self, i):
                 # constrain so that total output power sum_j u_j**2 = 1
                 self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2))
 
-        # # select the strongest action 
-        # if self.highlander_mode is True:
-        #     for ii in range(self.num_systems):
-        #         index = self.wu[ii].argmax()
-        #         val = self.wu[ii, index]
-        #         self.wu[ii] = np.zeros(len(self.u))
-        #         # now clip it
-        #         self.wu[ii,index] = min(1, val)
-
         # track information for plotting
         self.track_position.append(np.copy(self.x))
         # get edges of value function

From ee70e4288a34b77f6a75c2e859569379423b6b1e Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Sat, 21 May 2016 11:24:37 -0400
Subject: [PATCH 07/10] removing for loops in riskaware, using matrix and
 einsum

---
 risk_aware_control/riskaware.py | 49 ++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index d17ab1d..9c0555d 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -6,6 +6,7 @@
 class Runner: 
 
     def __init__(self):
+        self.num_systems = 3
         self.num_states = 200
         self.limit = 10
         self.dx = self.limit * 2.0 / self.num_states
@@ -13,20 +14,19 @@ def __init__(self):
                                    self.limit, 
                                    self.num_states)[:,None]
 
-        self.num_systems = 3
-        self.var = [3, 1, .5]
+        self.var = np.array([3, 1, .5])
 
         self.drift = 0#-.15 # systems drifts left
 
         # action set
-        self.u = np.array([0, .5, -.5])
+        self.u = np.array([0, .1, .5, -.5])
 
         self.L = np.zeros((len(self.var), 
             len(self.u), self.num_states, self.num_states)) 
         for ii in range(len(self.u)):
             for jj in range(self.num_states):
                 # set the system state
-                self.x = np.ones(self.num_states) * self.domain[jj]
+                self.x = np.ones(self.num_systems) * self.domain[jj]
                 # get the probability distribution of x
                 old_px = self.gen_px()
                 # apply the control signal
@@ -34,7 +34,7 @@ def __init__(self):
                 # get the new probability distribution of x
                 px = self.gen_px()
                 # calculate the change in the probability distribution
-                self.L[:, ii, :, jj] = np.copy(px - old_px)
+                self.L[:, ii, :, jj] = np.copy(px - old_px).T
  
         # the initial state probability 
         self.x = np.zeros(self.num_systems)
@@ -58,22 +58,18 @@ def make_v(self, mean=0):
         self.v += self.make_gauss(mean=mean+2,var=.6)
 
     def physics(self, u):
-        for ii in range(self.num_systems):
-            self.x[ii] += (self.drift + u[ii]) # simple physics
-            self.x[ii] = min(self.limit, max(-self.limit, self.x[ii]))
+        self.x += (self.drift + u) # simple physics
+        self.x = np.minimum(self.limit, np.maximum(-self.limit, self.x))
 
     def gen_px(self, x=None, var=None):
         x = np.copy(self.x) if x is None else x
         var = self.var if var is None else var
 
-        px = np.zeros((self.num_systems, self.num_states))
-        # TODO: rewrite this to avoid the for loop just using matrices 
-        for ii in range(self.num_systems):
-            px[ii] = self.make_gauss(x[ii], var[ii]).flatten()
-            # make sure no negative values
-            px[ii][np.where(px[ii] < 0)] = 0.0
-            # make sure things sum to 1
-            px[ii] /= np.max(np.sum(px[ii]) * self.dx)
+        px = self.make_gauss(x, var)
+        # make sure no negative values
+        px[np.where(px < 0)] = 0.0
+        # make sure things sum to 1
+        px /= np.sum(px, axis=0) * self.dx
         return px
 
 
@@ -89,13 +85,16 @@ def anim_animate(self, i):
 
         # calculate the weights for the actions
         self.wu = np.zeros((self.num_systems, len(self.u)))
-        
+        self.wu2 = np.zeros((self.num_systems, len(self.u)))
+      
         for ii in range(self.num_systems):
-            for jj in range(len(self.u)):
-                # constrain so that you can only weight actions positively
-                self.wu[ii,jj] = max(0, np.dot(self.v.T, np.dot(self.L[ii,jj], self.px[ii])))
+            # calculate weights for all actions simultaneously, v.T * L_i * p(x)
+            # constrain so that you can only weight actions positively
+            self.wu[ii] = np.maximum(self.wu[ii],
+                    np.einsum('lj,ij->i', self.v.T, 
+                        np.einsum('ijk,k->ij', self.L[ii], self.px[:,ii])))
+            # constrain so that total output power sum_j u_j**2 = 1
             if np.sum(self.wu[ii]) != 0:
-                # constrain so that total output power sum_j u_j**2 = 1
                 self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2))
 
         # track information for plotting
@@ -115,9 +114,9 @@ def anim_animate(self, i):
         # move the target around slowly
         self.make_v(np.sin(i*.1)*5)
 
-        self.px_line0.set_data(range(self.num_states), self.px[0])
-        self.px_line1.set_data(range(self.num_states), self.px[1])
-        self.px_line2.set_data(range(self.num_states), self.px[2])
+        self.px_line0.set_data(range(self.num_states), self.px[:,0])
+        self.px_line1.set_data(range(self.num_states), self.px[:,1])
+        self.px_line2.set_data(range(self.num_states), self.px[:,2])
         self.v_line.set_data(range(self.num_states), self.v)
 
         return self.v_line, self.px_line0, self.px_line1, self.px_line2
@@ -132,7 +131,7 @@ def run(self):
         
         plt.xlim([0, self.num_states-1])
         plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11))
-        plt.ylim([-1, 1])
+        plt.ylim([-1, 1.5])
 
         anim = animation.FuncAnimation(fig, self.anim_animate, 
                     init_func=self.anim_init, frames=500, 

From 2516461259a6db44b4b4af78712b324df4c4d4f7 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Sat, 21 May 2016 11:33:35 -0400
Subject: [PATCH 08/10] TODO:RA control not working properly anymore

---
 risk_aware_control/riskaware.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index 9c0555d..82e834e 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -3,6 +3,9 @@
 import matplotlib.pyplot as plt
 from matplotlib import animation
 
+#TODO: RA control isn't working as it should be, as on initial commit where
+# the most variance means it stays closest to the center of the road, why! 
+
 class Runner: 
 
     def __init__(self):

From e39ce0914fe623c2c8be9f067b96e6afc8bd2935 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Sun, 22 May 2016 15:34:08 -0400
Subject: [PATCH 09/10] Fixed - visibly working properly

Turns out there was no problem, it was just really hard to see it
working because of the normalization and the long tails on the
Gaussians everything stayed in the center. Set the Gaussian values
< .1 = 0 and now the normalization doesn't have such a ubiquitous
effect on the control, can see the variance affecting each sitch.
---
 risk_aware_control/riskaware.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index 82e834e..f72dfa0 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -17,12 +17,12 @@ def __init__(self):
                                    self.limit, 
                                    self.num_states)[:,None]
 
-        self.var = np.array([3, 1, .5])
+        self.var = np.array([2, 1, .5])
 
         self.drift = 0#-.15 # systems drifts left
 
         # action set
-        self.u = np.array([0, .1, .5, -.5])
+        self.u = np.array([0, .25, -.25, .5, -.5])
 
         self.L = np.zeros((len(self.var), 
             len(self.u), self.num_states, self.num_states)) 
@@ -42,6 +42,8 @@ def __init__(self):
         # the initial state probability 
         self.x = np.zeros(self.num_systems)
         self.px = self.gen_px() 
+        
+        self.track_lane_center = []
 
         # also need a cost function (Gaussian to move towards the center)
         self.make_v()
@@ -56,9 +58,10 @@ def make_gauss(self, mean=0, var=.5):
     def make_v(self, mean=0):
         # set up the road
         self.v = self.make_gauss(mean=mean,var=2) * 5 - 1
-        self.v[np.where(self.v > .5)] = .5
+        self.v[np.where(self.v > .5)] = 1
         # make a preference for being in the right lane
         self.v += self.make_gauss(mean=mean+2,var=.6)
+        self.track_lane_center.append(mean+2)
 
     def physics(self, u):
         self.x += (self.drift + u) # simple physics
@@ -70,7 +73,7 @@ def gen_px(self, x=None, var=None):
 
         px = self.make_gauss(x, var)
         # make sure no negative values
-        px[np.where(px < 0)] = 0.0
+        px[np.where(px < .1)] = 0.0
         # make sure things sum to 1
         px /= np.sum(px, axis=0) * self.dx
         return px
@@ -217,7 +220,7 @@ def run(self):
     ax.set_yticklabels([])
 
     plt.subplot(runner.num_systems+2, 1, 5)
-    target_center = np.sin(X[0]*.1)*5 + 2
+    target_center = np.array(runner.track_lane_center)[:-1]
     plt.plot((runner.track_position[:,0] - target_center)**2, lw=2)
     plt.plot((runner.track_position[:,1] - target_center)**2, lw=2)
     plt.plot((runner.track_position[:,2] - target_center)**2, lw=2)

From f1afae2b9493afe9e68ad29ece242ef737ad4b64 Mon Sep 17 00:00:00 2001
From: travis dewolf <travis.dewolf@gmail.com>
Date: Sun, 29 May 2016 19:17:10 -0400
Subject: [PATCH 10/10] removed seaborn dependencies

---
 risk_aware_control/riskaware.py | 36 +++++++++++++--------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py
index f72dfa0..59a96c9 100644
--- a/risk_aware_control/riskaware.py
+++ b/risk_aware_control/riskaware.py
@@ -1,11 +1,8 @@
 import numpy as np
-import seaborn
+# import seaborn
 import matplotlib.pyplot as plt
 from matplotlib import animation
 
-#TODO: RA control isn't working as it should be, as on initial commit where
-# the most variance means it stays closest to the center of the road, why! 
-
 class Runner: 
 
     def __init__(self):
@@ -22,7 +19,7 @@ def __init__(self):
         self.drift = 0#-.15 # systems drifts left
 
         # action set
-        self.u = np.array([0, .25, -.25, .5, -.5])
+        self.u = np.array([0, 1, -1, .5, -.5, .25, -.25])
 
         self.L = np.zeros((len(self.var), 
             len(self.u), self.num_states, self.num_states)) 
@@ -58,7 +55,7 @@ def make_gauss(self, mean=0, var=.5):
     def make_v(self, mean=0):
         # set up the road
         self.v = self.make_gauss(mean=mean,var=2) * 5 - 1
-        self.v[np.where(self.v > .5)] = 1
+        self.v[np.where(self.v > .5)] = .5
         # make a preference for being in the right lane
         self.v += self.make_gauss(mean=mean+2,var=.6)
         self.track_lane_center.append(mean+2)
@@ -73,7 +70,7 @@ def gen_px(self, x=None, var=None):
 
         px = self.make_gauss(x, var)
         # make sure no negative values
-        px[np.where(px < .1)] = 0.0
+        px[np.where(px < 0)] = 0.0
         # make sure things sum to 1
         px /= np.sum(px, axis=0) * self.dx
         return px
@@ -91,12 +88,11 @@ def anim_animate(self, i):
 
         # calculate the weights for the actions
         self.wu = np.zeros((self.num_systems, len(self.u)))
-        self.wu2 = np.zeros((self.num_systems, len(self.u)))
       
         for ii in range(self.num_systems):
             # calculate weights for all actions simultaneously, v.T * L_i * p(x)
             # constrain so that you can only weight actions positively
-            self.wu[ii] = np.maximum(self.wu[ii],
+            self.wu[ii] = np.maximum(np.ones(len(self.u)), 
                     np.einsum('lj,ij->i', self.v.T, 
                         np.einsum('ijk,k->ij', self.L[ii], self.px[:,ii])))
             # constrain so that total output power sum_j u_j**2 = 1
@@ -153,12 +149,9 @@ def run(self):
     # generate some nice plots
     fig = plt.figure(figsize=(8, 8))
 
-    # do some scaling to plot the same as seaborn heat plots
-    fix_for_plot = lambda x: (np.array(x).squeeze() / 
-            runner.limit / -2.0 + .5) * runner.num_states
-    track_target1 = fix_for_plot(runner.track_target1)
-    track_target2 = fix_for_plot(runner.track_target2)
-    track_position = fix_for_plot(runner.track_position)
+    track_target1 = np.array(runner.track_target1).squeeze()
+    track_target2 = np.array(runner.track_target2).squeeze()
+    track_position = np.array(runner.track_position).squeeze()
     runner.track_position = np.array(runner.track_position)
 
     time = track_position.shape[0]
@@ -170,10 +163,10 @@ def run(self):
     plt.title('Position on road')
 
     plt.subplot(runner.num_systems+2, 1, 4)
-    plt.fill_between(range(track_target2.shape[0]), 
-            track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.25)
     plt.fill_between(range(track_target1.shape[0]), 
-            track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.25)
+            track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.15)
+    plt.fill_between(range(track_target2.shape[0]), 
+            track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.45)
 
 
     for ii in range(runner.num_systems):
@@ -188,17 +181,16 @@ def run(self):
             heatmap[:,jj] = runner.make_gauss(
                     mean=runner.track_position[jj, ii], 
                     var=runner.var[ii]).flatten()
-        seaborn.heatmap(heatmap, xticklabels=False, yticklabels=False, 
-                cbar=False, cmap='Blues')   
+        plt.pcolormesh(X, Y, heatmap, cmap='Blues')   
 
         # plot filled in zones of desirability, first the road
         plt.fill_between(range(track_target1.shape[0]), 
                 track_target1[:,0], track_target1[:,1], 
-                facecolor='y', alpha=.25)
+                facecolor='y', alpha=.15)
         # and now the lane
         plt.fill_between(range(track_target2.shape[0]), 
                 track_target2[:,0], track_target2[:,1], 
-                facecolor='orange', alpha=.25)
+                facecolor='orange', alpha=.45)
 
         # plot actual position of each system
         line, = plt.plot(track_position[:,ii], 'k', lw=3)