From ad7094269b765004b8f7a6899ef9876aa644d865 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Thu, 14 Apr 2016 18:05:31 -0400 Subject: [PATCH 01/10] Initial draft of risk aware controllers - learning isn't working properly --- risk_aware_control/riskaware.py | 159 ++++++++++++++++++ risk_aware_control/riskaware_learning.py | 201 +++++++++++++++++++++++ 2 files changed, 360 insertions(+) create mode 100644 risk_aware_control/riskaware.py create mode 100644 risk_aware_control/riskaware_learning.py diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py new file mode 100644 index 0000000..8305b3d --- /dev/null +++ b/risk_aware_control/riskaware.py @@ -0,0 +1,159 @@ +import numpy as np +import seaborn +import matplotlib.pyplot as plt +from matplotlib import animation + +class Runner: + + def __init__(self): + self.num_states = 400 + self.domain = np.linspace(-10, 10, self.num_states) + + self.num_systems = 3 + self.x = np.zeros(self.num_systems) + self.var = [3, 1, .5] + # the initial state probability + self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)]) + + self.drift = -.15 # systems drifts left + self.highlander_mode = True # allow more than one action at a time? + + # have a controller that pushes us left + # self.u = np.array([1, -1, 5, -5, 10, -10]) + self.u = np.array([0, .5, -.5]) + self.L = [] + for u in self.u: + offset = int(u / 20.0 * 400.0) + self.L.append( + # moves away from current state + np.diag(np.ones(self.num_states)) * -1 + + # moves into state + u + np.diag(np.ones(self.num_states-abs(offset)), -offset)) + + # also need a cost function (Gaussian to move towards the center) + self.make_v() + + self.track_position = [] + self.track_target = [] + + def make_gauss(self, mean=0, var=.5): + return np.exp(-(self.domain-mean)**2 / (2*var**2)) + + def make_v(self, mean=0): + self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01) + self.v = self.v * 5 - 1 + self.v[np.where(self.v > 0)] = 1.0 + + def physics(self, u): + for ii in range(self.num_systems): + self.x[ii] += (self.drift + u[ii]) # simple physics + self.px[ii] = self.make_gauss(self.x[ii], self.var[ii]) + self.px[ii] /= np.max(self.px[ii]) + # clip at zero and normalize px + self.px[ii][np.where(self.px[ii] < 0)] = 0.0 + + def anim_init(self): + self.v_line.set_data([], []) + self.px_line0.set_data([], []) + self.px_line1.set_data([], []) + self.px_line2.set_data([], []) + plt.legend(['value function', 'np.dot(Li, p(x))']) + return self.v_line, self.px_line0, self.px_line1, self.px_line2 + + def anim_animate(self, i): + + # calculate the weights for the actions + self.wu = np.zeros((self.num_systems, len(self.u))) + for ii, Li in enumerate(self.L): + for jj in range(self.num_systems): + if self.highlander_mode is False: + self.wu[jj,ii] = min(1, + max(0, np.dot(self.v, np.dot(Li, self.px[jj])))) + else: + # don't clip it here so we can tell the actual winner + self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj])) + # select the strongest action + if self.highlander_mode is True: + for ii in range(self.num_systems): + index = self.wu[ii].argmax() + val = self.wu[ii, index] + self.wu[ii] = np.zeros(len(self.u)) + # now clip it + self.wu[ii,index] = min(1, val) + + print self.wu + + # track information for plotting + self.track_position.append(np.copy(self.x)) + # get edges of value function + road = np.where(self.v == 1) + self.track_target.append(np.array([self.domain[road[0][0]], # left edge + self.domain[road[0][-1]]])) # right edge + # simulate dynamics and get new state + self.physics(np.dot(self.wu, self.u)) + # move the target around slowly + self.make_v(np.sin(i*.1)*5) + + + self.px_line0.set_data(range(self.num_states), self.px[0]) + self.px_line1.set_data(range(self.num_states), self.px[1]) + self.px_line2.set_data(range(self.num_states), self.px[2]) + self.v_line.set_data(range(self.num_states), self.v) + + return self.v_line, self.px_line0, self.px_line1, self.px_line2 + + def run(self): + fig = plt.figure() + ax = fig.add_subplot(111) + self.v_line, = ax.plot([],[], color='r', lw=3) + self.px_line0, = ax.plot([],[], color='k', lw=3) + self.px_line1, = ax.plot([],[], color='k', lw=3) + self.px_line2, = ax.plot([],[], color='k', lw=3) + + plt.xlim([0, self.num_states-1]) + plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11)) + plt.ylim([-1, 1]) + + anim = animation.FuncAnimation(fig, self.anim_animate, + init_func=self.anim_init, frames=500, + interval=100, blit=True) + plt.show() + +if __name__ == '__main__': + + runner = Runner() + runner.run() + + # generate some nice plots + fig = plt.figure(figsize=(8, 8)) + runner.track_position = np.array(runner.track_position) + time = runner.track_position.shape[0] + X = np.arange(0, time) + Y = runner.domain + X, Y = np.meshgrid(X, Y) + for ii in range(runner.num_systems): + # plot borders, and path and heatmap for system ii + plt.subplot(runner.num_systems+1, 1, ii+1) + plt.plot(runner.track_position[:,ii], 'b', lw=5) # plot actual position of each system + plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries + # plot a heat map showing sensor information + heatmap = np.zeros((runner.num_states, time)) + for jj in range(time): + heatmap[:,jj] = runner.make_gauss(mean=runner.track_position[jj, ii], + var=runner.var[ii]) + plt.pcolormesh(X, Y, heatmap, cmap='terrain_r') + + + plt.title('Variance = %.2f'%runner.var[ii]) + plt.xlim([0, time-1]) + + # plot the borders and path of each + plt.subplot(runner.num_systems+1, 1, 4) + plt.plot(runner.track_position[:,ii], lw=3) # plot actual position of each system + plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries + plt.xlim([0, time-1]) + plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05]) + plt.xlabel('Time') + + plt.tight_layout() + plt.show() diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py new file mode 100644 index 0000000..9048c1f --- /dev/null +++ b/risk_aware_control/riskaware_learning.py @@ -0,0 +1,201 @@ +import numpy as np +import seaborn +import matplotlib.pyplot as plt +from matplotlib import animation + +class Runner: + + def __init__(self): + self.num_states = 400 + self.limit = 10 + self.domain = np.linspace(-self.limit, + self.limit, + self.num_states) + + self.x = 0 # initial position + self.var = .5 # variance in sensory information + # the initial state probability + self.px = self.make_gauss() + + self.drift = 0#-.15 # constant slide of the system in this direction + + # action set + self.u = np.array([0, .5, -.5]) + # for learning, start with no knowledge about actions + self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \ + for ii in range(len(self.u))] + # for ii in range(len(self.u)): + # np.fill_diagonal(self.L[ii], -1*np.ones(self.num_states)) + + # create some more for plotting later + self.L_init = np.copy(self.L) + self.L_actual = [np.zeros((self.num_states, self.num_states)) \ + for ii in range(len(self.u))] + for ii in range(len(self.u)): + for jj in range(self.num_states): + self.x = self.domain[jj] + px_old = self.make_gauss(mean=self.x) + px_old /= np.max(px_old) + px_old[np.where(px_old < 0)] = 0.0 + self.physics(self.u[ii]) + self.L_actual[ii][jj] = np.copy(self.px - px_old) + + # offset = int((u - self.drift)/ 20.0 * 400.0) + # self.L_actual.append( + # # moves away from current state + # np.diag(np.ones(self.num_states)) * -1 + + # # moves into state + u + # np.diag(np.ones(self.num_states-abs(offset)), -offset)) + self.x = 0 # initial position + + self.gamma = 1e-1 # learning rate + + # also need a cost function (Gaussian to move towards the center) + self.make_v() + + self.track_position = [] + self.track_target = [] + + def make_gauss(self, mean=0, var=.5): + return np.exp(-(self.domain-mean)**2 / (2*var**2)) + + def make_v(self, mean=0): + self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01) + self.v = self.v * 2 - 1 + self.v[np.where(self.v > 0)] = 1.0 + + def physics(self, u): + self.x += (self.drift + u) # simple physics + self.x = min(self.limit, max(-self.limit, self.x)) + self.px = self.make_gauss(self.x, self.var) + self.px /= np.max(self.px) + # clip at zero and normalize px + self.px[np.where(self.px < 0)] = 0.0 + + def anim_init(self): + self.v_line.set_data([], []) + self.px_line.set_data([], []) + self.Lpx_line0.set_data([], []) + self.Lpx_line1.set_data([], []) + self.Lpx_line2.set_data([], []) + plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px']) + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def anim_animate(self, i): + + # calculate the weights for the actions + self.wu = np.zeros(len(self.u)) + for ii, Li in enumerate(self.L): + # don't clip it here so we can tell the actual winner + self.wu[ii] = np.dot(self.v, np.dot(Li, self.px)) + + # set losers to zero (there can be only one action selected) + index = self.wu.argmax() + # add in some exploration + if int(np.random.random()*10) == 5: + print 'here' + index = np.random.choice(range(3)) + val = self.wu[index] + self.wu = np.zeros(len(self.u)) + # now clip it + self.wu[index] = 1# min(1, val) + # store the predicted change in probability + self.dpx_estimate = np.dot(self.L[index], self.px) + # also store the old px for calculating dpx + self.old_px = np.copy(self.px) + + # track information for plotting + self.track_position.append(np.copy(self.x)) + # get edges of value function + road = np.where(self.v == 1) + self.track_target.append(np.array([self.domain[road[0][0]], # left edge + self.domain[road[0][-1]]])) # right edge + + # simulate dynamics and get new state + self.physics(np.dot(self.wu, self.u)) + # move the target around slowly + self.make_v(np.sin(i*.01)*9) + + # do our learning + err = (self.px - self.old_px) - self.dpx_estimate + learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities + self.L[index] += learn + + self.px_line.set_data(range(self.num_states), self.px) + self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px)) + self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px)) + self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px)) + self.v_line.set_data(range(self.num_states), self.v) + + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def run(self): + fig = plt.figure() + ax = fig.add_subplot(111) + self.v_line, = ax.plot([],[], color='r', lw=3) + self.px_line, = ax.plot([],[], color='k', lw=3) + self.Lpx_line0, = ax.plot([],[], color='b', lw=3) + self.Lpx_line1, = ax.plot([],[], color='g', lw=3) + self.Lpx_line2, = ax.plot([],[], color='y', lw=3) + + plt.xlim([0, self.num_states-1]) + plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11)) + plt.ylim([-1, 1]) + + anim = animation.FuncAnimation(fig, self.anim_animate, + init_func=self.anim_init, frames=10000, + interval=0, blit=True) + plt.show() + +if __name__ == '__main__': + + runner = Runner() + runner.run() + + # generate some nice plots + axes = [] + X, Y = np.meshgrid(runner.domain, runner.domain) + plt.figure(figsize=(9,9)) + for ii in range(len(runner.u)): + # axes.append(plt.subplot(1,3,ii)) + # plt.axis('equal') + # runner.L[ii][0,0] = 1 + # runner.L[ii][0,1] = -1 + # plt.pcolormesh(X, Y, runner.L[ii])#, cmap='terrain_r') + + # plot the starting vs ending vs actual L operators + # plot a heat map showing sensor information + axes.append(plt.subplot(4, len(runner.u), ii+1)) + plt.axis('equal') + runner.L_init[ii][0,0] = 1 + runner.L_init[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_init[ii]) + + axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u))) + runner.L[ii][0,0] = 1 + runner.L[ii][0,1] = -1 + plt.axis('equal') + plt.pcolormesh(X, Y, runner.L[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u)))) + plt.axis('equal') + runner.L_actual[ii][0,0] = 1 + runner.L_actual[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_actual[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u)))) + plt.axis('equal') + diff = runner.L[ii] - runner.L_actual[ii] + diff[0,0] = 1 + diff[0,1] = -1 + plt.pcolormesh(X, Y, diff) + + print np.diag(runner.L[1]) + print np.diag(runner.L_actual[1]) + + axes[0].set_ylabel('Initial L operator') + axes[1].set_ylabel('Learned L operator') + axes[2].set_ylabel('Actual L operator') + plt.suptitle('Learning L operators') + # plt.tight_layout() + plt.show() From 452e8fe4709545b038440ddaca18bbdc0bd48211 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Fri, 15 Apr 2016 10:53:17 -0400 Subject: [PATCH 02/10] update to ra --- risk_aware_control/riskaware.py | 14 +++++++++----- risk_aware_control/riskaware_learning.py | 24 ++++++++++-------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index 8305b3d..b56fec6 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -7,7 +7,10 @@ class Runner: def __init__(self): self.num_states = 400 - self.domain = np.linspace(-10, 10, self.num_states) + self.limit = 10 + self.domain = np.linspace(-self.limit, + self.limit, + self.num_states) self.num_systems = 3 self.x = np.zeros(self.num_systems) @@ -16,11 +19,11 @@ def __init__(self): self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)]) self.drift = -.15 # systems drifts left - self.highlander_mode = True # allow more than one action at a time? + self.highlander_mode = False # allow more than one action at a time? - # have a controller that pushes us left - # self.u = np.array([1, -1, 5, -5, 10, -10]) + # action set self.u = np.array([0, .5, -.5]) + self.L = [] for u in self.u: offset = int(u / 20.0 * 400.0) @@ -40,7 +43,8 @@ def make_gauss(self, mean=0, var=.5): return np.exp(-(self.domain-mean)**2 / (2*var**2)) def make_v(self, mean=0): - self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01) + self.v = self.make_gauss(mean=mean,var=2) + \ + self.make_gauss(mean=mean,var=.01) self.v = self.v * 5 - 1 self.v[np.where(self.v > 0)] = 1.0 diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py index 9048c1f..fd438d0 100644 --- a/risk_aware_control/riskaware_learning.py +++ b/risk_aware_control/riskaware_learning.py @@ -24,8 +24,6 @@ def __init__(self): # for learning, start with no knowledge about actions self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \ for ii in range(len(self.u))] - # for ii in range(len(self.u)): - # np.fill_diagonal(self.L[ii], -1*np.ones(self.num_states)) # create some more for plotting later self.L_init = np.copy(self.L) @@ -33,19 +31,15 @@ def __init__(self): for ii in range(len(self.u))] for ii in range(len(self.u)): for jj in range(self.num_states): + # set the system state self.x = self.domain[jj] + # measure the probability distribution of x px_old = self.make_gauss(mean=self.x) - px_old /= np.max(px_old) - px_old[np.where(px_old < 0)] = 0.0 + # apply the control signal self.physics(self.u[ii]) - self.L_actual[ii][jj] = np.copy(self.px - px_old) - - # offset = int((u - self.drift)/ 20.0 * 400.0) - # self.L_actual.append( - # # moves away from current state - # np.diag(np.ones(self.num_states)) * -1 + - # # moves into state + u - # np.diag(np.ones(self.num_states-abs(offset)), -offset)) + # calculate the change in the probability distribution + self.L_actual[ii][:,jj] = np.copy(self.px - px_old) + self.x = 0 # initial position self.gamma = 1e-1 # learning rate @@ -60,7 +54,8 @@ def make_gauss(self, mean=0, var=.5): return np.exp(-(self.domain-mean)**2 / (2*var**2)) def make_v(self, mean=0): - self.v = self.make_gauss(mean=mean,var=2) + self.make_gauss(mean=mean,var=.01) + self.v = self.make_gauss(mean=mean,var=2) + \ + self.make_gauss(mean=mean,var=.01) self.v = self.v * 2 - 1 self.v[np.where(self.v > 0)] = 1.0 @@ -93,7 +88,6 @@ def anim_animate(self, i): index = self.wu.argmax() # add in some exploration if int(np.random.random()*10) == 5: - print 'here' index = np.random.choice(range(3)) val = self.wu[index] self.wu = np.zeros(len(self.u)) @@ -121,6 +115,7 @@ def anim_animate(self, i): learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities self.L[index] += learn + # update the line plots self.px_line.set_data(range(self.num_states), self.px) self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px)) self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px)) @@ -196,6 +191,7 @@ def run(self): axes[0].set_ylabel('Initial L operator') axes[1].set_ylabel('Learned L operator') axes[2].set_ylabel('Actual L operator') + axes[3].set_ylabel('Difference') plt.suptitle('Learning L operators') # plt.tight_layout() plt.show() From 7af17f8042dc5e653cc3b7427da0f86f51ca3fd0 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Tue, 10 May 2016 20:33:19 -0400 Subject: [PATCH 03/10] moved riskaware.py to using gen_px --- risk_aware_control/riskaware.py | 53 ++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index b56fec6..7c55f35 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -8,6 +8,7 @@ class Runner: def __init__(self): self.num_states = 400 self.limit = 10 + self.dx = self.limit * 2.0 / self.num_states self.domain = np.linspace(-self.limit, self.limit, self.num_states) @@ -24,6 +25,22 @@ def __init__(self): # action set self.u = np.array([0, .5, -.5]) + # self.L = [np.zeros((self.num_states, self.num_states)) \ + # for ii in range(len(self.u))] + # for ii in range(len(self.u)): + # for jj in range(self.num_states): + # # set the system state + # self.x = self.domain[jj] + # # get the probability distribution of x + # old_px = self.gen_px() + # # apply the control signal + # self.physics(self.u[ii]) + # # get the new probability distribution of x + # px = self.gen_px() + # # calculate the change in the probability distribution + # # self.L_actual[ii][jj] = np.copy(px_old - px) + # self.L_actual[ii][:,jj] = np.copy(px - old_px) + self.L = [] for u in self.u: offset = int(u / 20.0 * 400.0) @@ -51,10 +68,22 @@ def make_v(self, mean=0): def physics(self, u): for ii in range(self.num_systems): self.x[ii] += (self.drift + u[ii]) # simple physics - self.px[ii] = self.make_gauss(self.x[ii], self.var[ii]) - self.px[ii] /= np.max(self.px[ii]) - # clip at zero and normalize px - self.px[ii][np.where(self.px[ii] < 0)] = 0.0 + self.x[ii] = min(self.limit, max(-self.limit, self.x[ii])) + + def gen_px(self, x=None, var=None): + x = np.copy(self.x) if x is None else x + var = self.var if var is None else var + + px = np.zeros((self.num_systems, self.num_states)) + # TODO: rewrite this to avoid the for loop just using matrices + for ii in range(self.num_systems): + px[ii] = self.make_gauss(x[ii], var[ii]) + # make sure no negative values + px[ii][np.where(px[ii] < 0)] = 0.0 + # make sure things sum to 1 + px[ii] /= np.max(np.sum(px[ii]) * self.dx) + return px + def anim_init(self): self.v_line.set_data([], []) @@ -70,12 +99,13 @@ def anim_animate(self, i): self.wu = np.zeros((self.num_systems, len(self.u))) for ii, Li in enumerate(self.L): for jj in range(self.num_systems): - if self.highlander_mode is False: - self.wu[jj,ii] = min(1, - max(0, np.dot(self.v, np.dot(Li, self.px[jj])))) - else: + # check to see if there can be only one + if self.highlander_mode is True: # don't clip it here so we can tell the actual winner self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj])) + else: + self.wu[jj,ii] = min(1, + max(0, np.dot(self.v, np.dot(Li, self.px[jj])))) # select the strongest action if self.highlander_mode is True: for ii in range(self.num_systems): @@ -85,16 +115,17 @@ def anim_animate(self, i): # now clip it self.wu[ii,index] = min(1, val) - print self.wu - # track information for plotting self.track_position.append(np.copy(self.x)) # get edges of value function road = np.where(self.v == 1) self.track_target.append(np.array([self.domain[road[0][0]], # left edge self.domain[road[0][-1]]])) # right edge - # simulate dynamics and get new state + # apply the control signal, simulate dynamics and get new state self.physics(np.dot(self.wu, self.u)) + # get the new probability distribution of x + self.px = self.gen_px() + # move the target around slowly self.make_v(np.sin(i*.1)*5) From cf31068cd087bc26b658b557b253e8b5254a7fdf Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Tue, 10 May 2016 20:33:58 -0400 Subject: [PATCH 04/10] riskaware_learning working properly --- risk_aware_control/riskaware_learning.py | 57 +++++++++++++++--------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py index fd438d0..d22ce6a 100644 --- a/risk_aware_control/riskaware_learning.py +++ b/risk_aware_control/riskaware_learning.py @@ -6,21 +6,25 @@ class Runner: def __init__(self): - self.num_states = 400 - self.limit = 10 + self.num_states = 200 + self.limit = 20 + self.dx = self.limit * 2.0 / self.num_states self.domain = np.linspace(-self.limit, self.limit, self.num_states) self.x = 0 # initial position - self.var = .5 # variance in sensory information - # the initial state probability - self.px = self.make_gauss() + self.var = .4 # variance in sensory information + self.px = self.make_gauss() # the initial state probability - self.drift = 0#-.15 # constant slide of the system in this direction + # constant slide of the system in this direction + self.drift = 1.5 + + # how often should the system randomly choose an action (0-1) + self.exploration = 0.0 # action set - self.u = np.array([0, .5, -.5]) + self.u = np.array([0, .5, -1, 3, -5]) # for learning, start with no knowledge about actions self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \ for ii in range(len(self.u))] @@ -33,15 +37,18 @@ def __init__(self): for jj in range(self.num_states): # set the system state self.x = self.domain[jj] - # measure the probability distribution of x - px_old = self.make_gauss(mean=self.x) + # get the probability distribution of x + old_px = self.gen_px() # apply the control signal self.physics(self.u[ii]) + # get the new probability distribution of x + px = self.gen_px() # calculate the change in the probability distribution - self.L_actual[ii][:,jj] = np.copy(self.px - px_old) + # self.L_actual[ii][jj] = np.copy(px_old - px) + self.L_actual[ii][:,jj] = np.copy(px - old_px) + # self.L[ii] = np.copy(self.L_actual[ii]) self.x = 0 # initial position - self.gamma = 1e-1 # learning rate # also need a cost function (Gaussian to move towards the center) @@ -60,12 +67,19 @@ def make_v(self, mean=0): self.v[np.where(self.v > 0)] = 1.0 def physics(self, u): - self.x += (self.drift + u) # simple physics + self.x += self.drift + u # simple physics self.x = min(self.limit, max(-self.limit, self.x)) - self.px = self.make_gauss(self.x, self.var) - self.px /= np.max(self.px) - # clip at zero and normalize px - self.px[np.where(self.px < 0)] = 0.0 + + def gen_px(self, x=None, var=None): + x = np.copy(self.x) if x is None else x + var = self.var if var is None else var + + px = self.make_gauss(x, var) + # make sure no negative values + px[np.where(px < 0)] = 0.0 + # make sure things sum to 1 + px /= np.max(np.sum(px) * self.dx) + return px def anim_init(self): self.v_line.set_data([], []) @@ -107,12 +121,16 @@ def anim_animate(self, i): # simulate dynamics and get new state self.physics(np.dot(self.wu, self.u)) + # generate the new px + self.px = self.gen_px() # move the target around slowly - self.make_v(np.sin(i*.01)*9) + self.make_v(np.sin(i*.01)*(self.limit-1)) # do our learning err = (self.px - self.old_px) - self.dpx_estimate - learn = self.gamma * np.outer(err, self.px) # learning_rate * err * activities + # NOTE: need to use old_px because that's what our + # dpx_estimate was calculated for + learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities self.L[index] += learn # update the line plots @@ -185,9 +203,6 @@ def run(self): diff[0,1] = -1 plt.pcolormesh(X, Y, diff) - print np.diag(runner.L[1]) - print np.diag(runner.L_actual[1]) - axes[0].set_ylabel('Initial L operator') axes[1].set_ylabel('Learned L operator') axes[2].set_ylabel('Actual L operator') From 24b84550a0244dd645c45dcbdbcc5e2272e77ee2 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Fri, 20 May 2016 10:11:08 -0400 Subject: [PATCH 05/10] reworked riskaware control, sample to gen L, better plots, weight selection --- risk_aware_control/riskaware.py | 191 +++++++++----- risk_aware_control/riskaware_learning.py | 2 - .../riskaware_learning_multiple.py | 216 ++++++++++++++++ risk_aware_control/riskaware_neurons.py | 242 ++++++++++++++++++ 4 files changed, 580 insertions(+), 71 deletions(-) create mode 100644 risk_aware_control/riskaware_learning_multiple.py create mode 100644 risk_aware_control/riskaware_neurons.py diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index 7c55f35..036cd22 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -6,64 +6,57 @@ class Runner: def __init__(self): - self.num_states = 400 + self.num_states = 200 self.limit = 10 self.dx = self.limit * 2.0 / self.num_states self.domain = np.linspace(-self.limit, self.limit, - self.num_states) + self.num_states)[:,None] self.num_systems = 3 - self.x = np.zeros(self.num_systems) self.var = [3, 1, .5] - # the initial state probability - self.px = np.vstack([self.make_gauss() for ii in range(self.num_systems)]) - self.drift = -.15 # systems drifts left + self.drift = 0#-.15 # systems drifts left self.highlander_mode = False # allow more than one action at a time? # action set self.u = np.array([0, .5, -.5]) - # self.L = [np.zeros((self.num_states, self.num_states)) \ - # for ii in range(len(self.u))] - # for ii in range(len(self.u)): - # for jj in range(self.num_states): - # # set the system state - # self.x = self.domain[jj] - # # get the probability distribution of x - # old_px = self.gen_px() - # # apply the control signal - # self.physics(self.u[ii]) - # # get the new probability distribution of x - # px = self.gen_px() - # # calculate the change in the probability distribution - # # self.L_actual[ii][jj] = np.copy(px_old - px) - # self.L_actual[ii][:,jj] = np.copy(px - old_px) + self.L = np.zeros((len(self.var), + len(self.u), self.num_states, self.num_states)) + for ii in range(len(self.u)): + for jj in range(self.num_states): + # set the system state + self.x = np.ones(self.num_states) * self.domain[jj] + # get the probability distribution of x + old_px = self.gen_px() + # apply the control signal + self.physics(np.ones(self.num_systems) * self.u[ii]) + # get the new probability distribution of x + px = self.gen_px() + # calculate the change in the probability distribution + self.L[:, ii, :, jj] = np.copy(px - old_px) - self.L = [] - for u in self.u: - offset = int(u / 20.0 * 400.0) - self.L.append( - # moves away from current state - np.diag(np.ones(self.num_states)) * -1 + - # moves into state + u - np.diag(np.ones(self.num_states-abs(offset)), -offset)) + # the initial state probability + self.x = np.zeros(self.num_systems) + self.px = self.gen_px() # also need a cost function (Gaussian to move towards the center) self.make_v() self.track_position = [] - self.track_target = [] + self.track_target1 = [] + self.track_target2 = [] def make_gauss(self, mean=0, var=.5): - return np.exp(-(self.domain-mean)**2 / (2*var**2)) + return np.exp(-(self.domain-mean)**2 / (2.0*var**2)) def make_v(self, mean=0): - self.v = self.make_gauss(mean=mean,var=2) + \ - self.make_gauss(mean=mean,var=.01) - self.v = self.v * 5 - 1 - self.v[np.where(self.v > 0)] = 1.0 + # set up the road + self.v = self.make_gauss(mean=mean,var=2) * 5 - 1 + self.v[np.where(self.v > .5)] = .5 + # make a preference for being in the right lane + self.v += self.make_gauss(mean=mean+2,var=.6) def physics(self, u): for ii in range(self.num_systems): @@ -77,7 +70,7 @@ def gen_px(self, x=None, var=None): px = np.zeros((self.num_systems, self.num_states)) # TODO: rewrite this to avoid the for loop just using matrices for ii in range(self.num_systems): - px[ii] = self.make_gauss(x[ii], var[ii]) + px[ii] = self.make_gauss(x[ii], var[ii]).flatten() # make sure no negative values px[ii][np.where(px[ii] < 0)] = 0.0 # make sure things sum to 1 @@ -97,30 +90,42 @@ def anim_animate(self, i): # calculate the weights for the actions self.wu = np.zeros((self.num_systems, len(self.u))) - for ii, Li in enumerate(self.L): - for jj in range(self.num_systems): - # check to see if there can be only one - if self.highlander_mode is True: - # don't clip it here so we can tell the actual winner - self.wu[jj,ii] = np.dot(self.v, np.dot(Li, self.px[jj])) - else: - self.wu[jj,ii] = min(1, - max(0, np.dot(self.v, np.dot(Li, self.px[jj])))) - # select the strongest action - if self.highlander_mode is True: - for ii in range(self.num_systems): - index = self.wu[ii].argmax() - val = self.wu[ii, index] - self.wu[ii] = np.zeros(len(self.u)) - # now clip it - self.wu[ii,index] = min(1, val) + # for ii in range(len(self.u)): + # for jj in range(self.num_systems): + # # check to see if there can be only one + # if self.highlander_mode is True: + # # don't clip it here so we can tell the actual winner + # self.wu[jj,ii] = np.dot(self.v, np.dot(self.L[:, ii], self.px[jj])) + # else: + # self.wu[jj,ii] = min(1, + # max(0, np.dot(self.v.T, np.dot(self.L[jj,ii], self.px[jj])))) + + for ii in range(self.num_systems): + for jj in range(len(self.u)): + # constrain so that you can only weight actions positively + self.wu[ii,jj] = max(0, np.dot(self.v.T, np.dot(self.L[ii,jj], self.px[ii]))) + if np.sum(self.wu[ii]) != 0: + # constrain so that total output power sum_j u_j**2 = 1 + self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2)) + + # # select the strongest action + # if self.highlander_mode is True: + # for ii in range(self.num_systems): + # index = self.wu[ii].argmax() + # val = self.wu[ii, index] + # self.wu[ii] = np.zeros(len(self.u)) + # # now clip it + # self.wu[ii,index] = min(1, val) # track information for plotting self.track_position.append(np.copy(self.x)) # get edges of value function - road = np.where(self.v == 1) - self.track_target.append(np.array([self.domain[road[0][0]], # left edge + road = np.where(self.v >= .5) + self.track_target1.append(np.array([self.domain[road[0][0]], # left edge self.domain[road[0][-1]]])) # right edge + lane = np.where(self.v >= .6) + self.track_target2.append(np.array([self.domain[lane[0][0]], # left edge + self.domain[lane[0][-1]]])) # right edge # apply the control signal, simulate dynamics and get new state self.physics(np.dot(self.wu, self.u)) # get the new probability distribution of x @@ -129,7 +134,6 @@ def anim_animate(self, i): # move the target around slowly self.make_v(np.sin(i*.1)*5) - self.px_line0.set_data(range(self.num_states), self.px[0]) self.px_line1.set_data(range(self.num_states), self.px[1]) self.px_line2.set_data(range(self.num_states), self.px[2]) @@ -152,6 +156,7 @@ def run(self): anim = animation.FuncAnimation(fig, self.anim_animate, init_func=self.anim_init, frames=500, interval=100, blit=True) + plt.show() if __name__ == '__main__': @@ -161,33 +166,81 @@ def run(self): # generate some nice plots fig = plt.figure(figsize=(8, 8)) + + # do some scaling to plot the same as seaborn heat plots + fix_for_plot = lambda x: (np.array(x).squeeze() / + runner.limit / -2.0 + .5) * runner.num_states + track_target1 = fix_for_plot(runner.track_target1) + track_target2 = fix_for_plot(runner.track_target2) + track_position = fix_for_plot(runner.track_position) runner.track_position = np.array(runner.track_position) - time = runner.track_position.shape[0] + + time = track_position.shape[0] X = np.arange(0, time) Y = runner.domain X, Y = np.meshgrid(X, Y) + + plt.subplot(runner.num_systems+2, 1, 1) + plt.title('Position on road') + + plt.subplot(runner.num_systems+2, 1, 4) + plt.fill_between(range(track_target2.shape[0]), + track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.25) + plt.fill_between(range(track_target1.shape[0]), + track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.25) + + for ii in range(runner.num_systems): # plot borders, and path and heatmap for system ii - plt.subplot(runner.num_systems+1, 1, ii+1) - plt.plot(runner.track_position[:,ii], 'b', lw=5) # plot actual position of each system - plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries + plt.subplot(runner.num_systems+2, 1, ii+1) + # plot road boundaries + plt.plot(track_target1, 'r--', lw=5) + # plot a heat map showing sensor information heatmap = np.zeros((runner.num_states, time)) for jj in range(time): - heatmap[:,jj] = runner.make_gauss(mean=runner.track_position[jj, ii], - var=runner.var[ii]) - plt.pcolormesh(X, Y, heatmap, cmap='terrain_r') - - - plt.title('Variance = %.2f'%runner.var[ii]) + heatmap[:,jj] = runner.make_gauss( + mean=runner.track_position[jj, ii], + var=runner.var[ii]).flatten() + seaborn.heatmap(heatmap, xticklabels=False, yticklabels=False, + cbar=False, cmap='Blues') + + # plot filled in zones of desirability, first the road + plt.fill_between(range(track_target1.shape[0]), + track_target1[:,0], track_target1[:,1], + facecolor='y', alpha=.25) + # and now the lane + plt.fill_between(range(track_target2.shape[0]), + track_target2[:,0], track_target2[:,1], + facecolor='orange', alpha=.25) + + # plot actual position of each system + line, = plt.plot(track_position[:,ii], 'k', lw=3) + + plt.legend([line], ['Variance = %.2f'%runner.var[ii]], + frameon=True, bbox_to_anchor=[1,1.05]) plt.xlim([0, time-1]) + plt.ylabel('Position') # plot the borders and path of each - plt.subplot(runner.num_systems+1, 1, 4) - plt.plot(runner.track_position[:,ii], lw=3) # plot actual position of each system - plt.plot(runner.track_target, 'r--', lw=3) # plot road boundaries + ax = plt.subplot(runner.num_systems+2, 1, 4) + + plt.plot(track_position[:,ii], lw=3) # plot actual position of each system + plt.xlim([0, time-1]) plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05]) + plt.ylabel('Position') + ax.set_xticklabels([]) + ax.set_yticklabels([]) + + plt.subplot(runner.num_systems+2, 1, 5) + target_center = np.sin(X[0]*.1)*5 + 2 + plt.plot((runner.track_position[:,0] - target_center)**2, lw=2) + plt.plot((runner.track_position[:,1] - target_center)**2, lw=2) + plt.plot((runner.track_position[:,2] - target_center)**2, lw=2) + plt.legend(runner.var, frameon=True, bbox_to_anchor=[1, 1.05]) + plt.title('Distance from center of lane') + plt.ylabel('Squared error') plt.xlabel('Time') plt.tight_layout() diff --git a/risk_aware_control/riskaware_learning.py b/risk_aware_control/riskaware_learning.py index d22ce6a..7a21473 100644 --- a/risk_aware_control/riskaware_learning.py +++ b/risk_aware_control/riskaware_learning.py @@ -44,9 +44,7 @@ def __init__(self): # get the new probability distribution of x px = self.gen_px() # calculate the change in the probability distribution - # self.L_actual[ii][jj] = np.copy(px_old - px) self.L_actual[ii][:,jj] = np.copy(px - old_px) - # self.L[ii] = np.copy(self.L_actual[ii]) self.x = 0 # initial position self.gamma = 1e-1 # learning rate diff --git a/risk_aware_control/riskaware_learning_multiple.py b/risk_aware_control/riskaware_learning_multiple.py new file mode 100644 index 0000000..31b8c13 --- /dev/null +++ b/risk_aware_control/riskaware_learning_multiple.py @@ -0,0 +1,216 @@ +import numpy as np +import seaborn +import matplotlib.pyplot as plt +from matplotlib import animation + +class Runner: + """ The goal of this script is to be able to learn + the L operators of several actions simultaneously, + such that you don't have to choose only one action + at a time during training. """ + + def __init__(self): + self.num_states = 200 + self.limit = 20 + self.dx = self.limit * 2.0 / self.num_states + self.domain = np.linspace(-self.limit, + self.limit, + self.num_states) + + self.x = 0 # initial position + self.var = .4 # variance in sensory information + # the initial state probability + self.px = self.make_gauss() + + self.drift = 1.5 # constant slide of the system in this direction + + # action set + self.u = np.array([0, .5, -1, 3, -5]) + # for learning, start with no knowledge about actions + self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \ + for ii in range(len(self.u))] + + # create some more for plotting later + self.L_init = np.copy(self.L) + self.L_actual = [np.zeros((self.num_states, self.num_states)) \ + for ii in range(len(self.u))] + for ii in range(len(self.u)): + for jj in range(self.num_states): + # set the system state + self.x = self.domain[jj] + # get the probability distribution of x + old_px = self.gen_px() + # apply the control signal + self.physics(self.u[ii]) + # get the new probability distribution of x + px = self.gen_px() + # calculate the change in the probability distribution + # self.L_actual[ii][jj] = np.copy(px_old - px) + self.L_actual[ii][:,jj] = np.copy(px - old_px) + # self.L[ii] = np.copy(self.L_actual[ii]) + + self.x = 0 # initial position + self.gamma = 1e-1 # learning rate + + # also need a cost function (Gaussian to move towards the center) + self.make_v() + + self.track_position = [] + self.track_target = [] + + def make_gauss(self, mean=0, var=.5): + return np.exp(-(self.domain-mean)**2 / (2*var**2)) + + def make_v(self, mean=0): + self.v = self.make_gauss(mean=mean,var=2) + \ + self.make_gauss(mean=mean,var=.01) + self.v = self.v * 2 - 1 + self.v[np.where(self.v > 0)] = 1.0 + + def physics(self, u): + self.x += self.drift + u # simple physics + self.x = min(self.limit, max(-self.limit, self.x)) + + def gen_px(self, x=None, var=None, noise=False): + x = np.copy(self.x) if x is None else x + var = self.var if var is None else var + + if noise is True: + x += int(np.random.random() * 2) + x = min(self.limit, max(-self.limit, x)) + px = self.make_gauss(x, var) + # make sure no negative values + px[np.where(px < 0)] = 0.0 + # make sure things sum to 1 + px /= np.max(np.sum(px) * self.dx) + return px + + def anim_init(self): + self.v_line.set_data([], []) + self.px_line.set_data([], []) + self.Lpx_line0.set_data([], []) + self.Lpx_line1.set_data([], []) + self.Lpx_line2.set_data([], []) + plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px']) + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def anim_animate(self, i): + + # calculate the weights for the actions + self.wu = np.zeros(len(self.u)) + for ii, Li in enumerate(self.L): + # don't clip it here so we can tell the actual winner + self.wu[ii] = np.dot(self.v, np.dot(Li, self.px)) + + # set losers to zero (there can be only one action selected) + index = self.wu.argmax() + # add in some exploration + if int(np.random.random()*10) == 5: + index = np.random.choice(range(3)) + val = self.wu[index] + self.wu = np.zeros(len(self.u)) + # now clip it + self.wu[index] = 1# min(1, val) + # store the predicted change in probability + self.dpx_estimate = np.dot(self.L[index], self.px) + # also store the old px for calculating dpx + self.old_px = np.copy(self.px) + + # track information for plotting + self.track_position.append(np.copy(self.x)) + # get edges of value function + road = np.where(self.v == 1) + self.track_target.append(np.array([self.domain[road[0][0]], # left edge + self.domain[road[0][-1]]])) # right edge + + # simulate dynamics and get new state + self.physics(np.dot(self.wu, self.u)) + # generate the new px + self.px = self.gen_px(noise=False) + # move the target around slowly + self.make_v(np.sin(i*.01)*(self.limit-1)) + + # do our learning + err = (self.px - self.old_px) - self.dpx_estimate + # NOTE: need to use old_px because that's what our + # dpx_estimate was calculated for + learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities + self.L[index] += learn + + # update the line plots + self.px_line.set_data(range(self.num_states), self.px) + self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px)) + self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px)) + self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px)) + self.v_line.set_data(range(self.num_states), self.v) + + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def run(self): + fig = plt.figure() + ax = fig.add_subplot(111) + self.v_line, = ax.plot([],[], color='r', lw=3) + self.px_line, = ax.plot([],[], color='k', lw=3) + self.Lpx_line0, = ax.plot([],[], color='b', lw=3) + self.Lpx_line1, = ax.plot([],[], color='g', lw=3) + self.Lpx_line2, = ax.plot([],[], color='y', lw=3) + + plt.xlim([0, self.num_states-1]) + plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11)) + plt.ylim([-1, 1]) + + anim = animation.FuncAnimation(fig, self.anim_animate, + init_func=self.anim_init, frames=10000, + interval=0, blit=True) + plt.show() + +if __name__ == '__main__': + + runner = Runner() + runner.run() + + # generate some nice plots + axes = [] + X, Y = np.meshgrid(runner.domain, runner.domain) + plt.figure(figsize=(9,9)) + for ii in range(len(runner.u)): + # axes.append(plt.subplot(1,3,ii)) + # plt.axis('equal') + # runner.L[ii][0,0] = 1 + # runner.L[ii][0,1] = -1 + # plt.pcolormesh(X, Y, runner.L[ii])#, cmap='terrain_r') + + # plot the starting vs ending vs actual L operators + # plot a heat map showing sensor information + axes.append(plt.subplot(4, len(runner.u), ii+1)) + plt.axis('equal') + runner.L_init[ii][0,0] = 1 + runner.L_init[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_init[ii]) + + axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u))) + runner.L[ii][0,0] = 1 + runner.L[ii][0,1] = -1 + plt.axis('equal') + plt.pcolormesh(X, Y, runner.L[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u)))) + plt.axis('equal') + runner.L_actual[ii][0,0] = 1 + runner.L_actual[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_actual[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u)))) + plt.axis('equal') + diff = runner.L[ii] - runner.L_actual[ii] + diff[0,0] = 1 + diff[0,1] = -1 + plt.pcolormesh(X, Y, diff) + + axes[0].set_ylabel('Initial L operator') + axes[1].set_ylabel('Learned L operator') + axes[2].set_ylabel('Actual L operator') + axes[3].set_ylabel('Difference') + plt.suptitle('Learning L operators') + # plt.tight_layout() + plt.show() diff --git a/risk_aware_control/riskaware_neurons.py b/risk_aware_control/riskaware_neurons.py new file mode 100644 index 0000000..d066178 --- /dev/null +++ b/risk_aware_control/riskaware_neurons.py @@ -0,0 +1,242 @@ +import numpy as np +import seaborn +import matplotlib.pyplot as plt +from matplotlib import animation + +class Runner: + """ In this class we're hooking up 200 neurons to the 'car', + so that half of them drive the car to the left, and half of + them drive the car to the right. The risk-aware system needs + to control the car through these neurons. """ + + def __init__(self): + self.num_states = 200 + self.limit = 20 + self.dx = self.limit * 2.0 / self.num_states + self.domain = np.linspace(-self.limit, + self.limit, + self.num_states) + + self.x = 0 # initial position + self.var = .4 # variance in sensory information + # the initial state probability + self.px = self.make_gauss() + + self.drift = 1.5 # constant slide of the system in this direction + + # action set + import nengo + model = nengo.Network() + # I think that the way for this to work is for the input + # to these neurons to be something like direct stimulation, + # and relatively strong, so projecting in either 10 or 0 + # as the two options...at least initially. + # TODO: train up a system where the input can be a range of values + # NOTE: this would possibly just be a whole other set of actions... + # so maybe the way to think about it is that activating each individual + # neuron is an action, and so is not activating each neuron. And then + # if you want to be able to input a range of values that's just more actions. + with model: + ensemble = nengo.Ensemble(n_neurons=200, dimensions=1) # dimensions is irrelevant + + # TODO: is the input here supposed to be direct neural stimulation? + def get_input(t): + return self.u + node_in = nengo.Node(output=get_input, size_out=ensemblen_neurons) + # send the input to directly stimulate each neuron + nengo.Connection(node_in, ensemble.neurons) + + def set_output(t, x): + self.neuron_activity = np.copy(x) + node_out = nengo.Node(output=set_output, size_in=1) + + # half the neurons push to the left (negative weight) + # half the neurons push to the right (positive weight) + weights = np.random.random((200,1)) * 10.0 - 5.0 + nengo.Connection(ensemble, node_out, + transform=weights) + # TODO: add in some noise + + # for learning, start with no knowledge about actions + self.L = [(np.random.random((self.num_states,self.num_states))*2-1)*1e-1 \ + for ii in range(len(self.u))] + + # create some more for plotting later + self.L_init = np.copy(self.L) + self.L_actual = [np.zeros((self.num_states, self.num_states)) \ + for ii in range(len(self.u))] + for ii in range(len(self.u)): + for jj in range(self.num_states): + # set the system state + self.x = self.domain[jj] + # get the probability distribution of x + old_px = self.gen_px() + # apply the control signal + self.physics(self.u[ii]) + # get the new probability distribution of x + px = self.gen_px() + # calculate the change in the probability distribution + # self.L_actual[ii][jj] = np.copy(px_old - px) + self.L_actual[ii][:,jj] = np.copy(px - old_px) + # self.L[ii] = np.copy(self.L_actual[ii]) + + self.x = 0 # initial position + self.gamma = 1e-1 # learning rate + + # also need a cost function (Gaussian to move towards the center) + self.make_v() + + self.track_position = [] + self.track_target = [] + + def make_gauss(self, mean=0, var=.5): + return np.exp(-(self.domain-mean)**2 / (2*var**2)) + + def make_v(self, mean=0): + self.v = self.make_gauss(mean=mean,var=2) + \ + self.make_gauss(mean=mean,var=.01) + self.v = self.v * 2 - 1 + self.v[np.where(self.v > 0)] = 1.0 + + def physics(self, u): + self.x += self.drift + u # simple physics + self.x = min(self.limit, max(-self.limit, self.x)) + + def gen_px(self, x=None, var=None, noise=False): + x = np.copy(self.x) if x is None else x + var = self.var if var is None else var + + if noise is True: + x += int(np.random.random() * 2) + x = min(self.limit, max(-self.limit, x)) + px = self.make_gauss(x, var) + # make sure no negative values + px[np.where(px < 0)] = 0.0 + # make sure things sum to 1 + px /= np.max(np.sum(px) * self.dx) + return px + + def anim_init(self): + self.v_line.set_data([], []) + self.px_line.set_data([], []) + self.Lpx_line0.set_data([], []) + self.Lpx_line1.set_data([], []) + self.Lpx_line2.set_data([], []) + plt.legend(['value function', 'px', 'L0*px', 'L1*px', 'L2*px']) + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def anim_animate(self, i): + + # calculate the weights for the actions + self.wu = np.zeros(len(self.u)) + for ii, Li in enumerate(self.L): + # don't clip it here so we can tell the actual winner + self.wu[ii] = np.dot(self.v, np.dot(Li, self.px)) + + # set losers to zero (there can be only one action selected) + index = self.wu.argmax() + # add in some exploration + if int(np.random.random()*10) == 5: + index = np.random.choice(range(3)) + val = self.wu[index] + self.wu = np.zeros(len(self.u)) + # now clip it + self.wu[index] = 1# min(1, val) + # store the predicted change in probability + self.dpx_estimate = np.dot(self.L[index], self.px) + # also store the old px for calculating dpx + self.old_px = np.copy(self.px) + + # track information for plotting + self.track_position.append(np.copy(self.x)) + # get edges of value function + road = np.where(self.v == 1) + self.track_target.append(np.array([self.domain[road[0][0]], # left edge + self.domain[road[0][-1]]])) # right edge + + # simulate dynamics and get new state + self.physics(np.dot(self.wu, self.u)) + # generate the new px + self.px = self.gen_px(noise=False) + # move the target around slowly + self.make_v(np.sin(i*.01)*(self.limit-1)) + + # do our learning + err = (self.px - self.old_px) - self.dpx_estimate + # NOTE: need to use old_px because that's what our + # dpx_estimate was calculated for + learn = self.gamma * np.outer(err, self.old_px) # learning_rate * err * activities + self.L[index] += learn + + # update the line plots + self.px_line.set_data(range(self.num_states), self.px) + self.Lpx_line0.set_data(range(self.num_states), np.dot(self.L[0], self.px)) + self.Lpx_line1.set_data(range(self.num_states), np.dot(self.L[1], self.px)) + self.Lpx_line2.set_data(range(self.num_states), np.dot(self.L[2], self.px)) + self.v_line.set_data(range(self.num_states), self.v) + + return self.v_line, self.px_line, self.Lpx_line0, self.Lpx_line1, self.Lpx_line2 + + def run(self): + fig = plt.figure() + ax = fig.add_subplot(111) + self.v_line, = ax.plot([],[], color='r', lw=3) + self.px_line, = ax.plot([],[], color='k', lw=3) + self.Lpx_line0, = ax.plot([],[], color='b', lw=3) + self.Lpx_line1, = ax.plot([],[], color='g', lw=3) + self.Lpx_line2, = ax.plot([],[], color='y', lw=3) + + plt.xlim([0, self.num_states-1]) + plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11)) + plt.ylim([-1, 1]) + + anim = animation.FuncAnimation(fig, self.anim_animate, + init_func=self.anim_init, frames=10000, + interval=0, blit=True) + plt.show() + +if __name__ == '__main__': + + runner = Runner() + runner.run() + + # generate some nice plots + axes = [] + X, Y = np.meshgrid(runner.domain, runner.domain) + plt.figure(figsize=(9,9)) + for ii in range(len(runner.u)): + + # plot the starting vs ending vs actual L operators + # plot a heat map showing sensor information + axes.append(plt.subplot(4, len(runner.u), ii+1)) + plt.axis('equal') + runner.L_init[ii][0,0] = 1 + runner.L_init[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_init[ii]) + + axes.append(plt.subplot(4, len(runner.u), ii+1+len(runner.u))) + runner.L[ii][0,0] = 1 + runner.L[ii][0,1] = -1 + plt.axis('equal') + plt.pcolormesh(X, Y, runner.L[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+2*len(runner.u)))) + plt.axis('equal') + runner.L_actual[ii][0,0] = 1 + runner.L_actual[ii][0,1] = -1 + plt.pcolormesh(X, Y, runner.L_actual[ii]) + + axes.append(plt.subplot(4, len(runner.u), (ii+1+3*len(runner.u)))) + plt.axis('equal') + diff = runner.L[ii] - runner.L_actual[ii] + diff[0,0] = 1 + diff[0,1] = -1 + plt.pcolormesh(X, Y, diff) + + axes[0].set_ylabel('Initial L operator') + axes[1].set_ylabel('Learned L operator') + axes[2].set_ylabel('Actual L operator') + axes[3].set_ylabel('Difference') + plt.suptitle('Learning L operators') + # plt.tight_layout() + plt.show() From 52d239a85bfc25187af4f00c87c5977493614869 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Fri, 20 May 2016 10:11:37 -0400 Subject: [PATCH 06/10] removed highlander mode options in riskaware --- risk_aware_control/riskaware.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index 036cd22..d17ab1d 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -17,7 +17,6 @@ def __init__(self): self.var = [3, 1, .5] self.drift = 0#-.15 # systems drifts left - self.highlander_mode = False # allow more than one action at a time? # action set self.u = np.array([0, .5, -.5]) @@ -90,15 +89,6 @@ def anim_animate(self, i): # calculate the weights for the actions self.wu = np.zeros((self.num_systems, len(self.u))) - # for ii in range(len(self.u)): - # for jj in range(self.num_systems): - # # check to see if there can be only one - # if self.highlander_mode is True: - # # don't clip it here so we can tell the actual winner - # self.wu[jj,ii] = np.dot(self.v, np.dot(self.L[:, ii], self.px[jj])) - # else: - # self.wu[jj,ii] = min(1, - # max(0, np.dot(self.v.T, np.dot(self.L[jj,ii], self.px[jj])))) for ii in range(self.num_systems): for jj in range(len(self.u)): @@ -108,15 +98,6 @@ def anim_animate(self, i): # constrain so that total output power sum_j u_j**2 = 1 self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2)) - # # select the strongest action - # if self.highlander_mode is True: - # for ii in range(self.num_systems): - # index = self.wu[ii].argmax() - # val = self.wu[ii, index] - # self.wu[ii] = np.zeros(len(self.u)) - # # now clip it - # self.wu[ii,index] = min(1, val) - # track information for plotting self.track_position.append(np.copy(self.x)) # get edges of value function From ee70e4288a34b77f6a75c2e859569379423b6b1e Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Sat, 21 May 2016 11:24:37 -0400 Subject: [PATCH 07/10] removing for loops in riskaware, using matrix and einsum --- risk_aware_control/riskaware.py | 49 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index d17ab1d..9c0555d 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -6,6 +6,7 @@ class Runner: def __init__(self): + self.num_systems = 3 self.num_states = 200 self.limit = 10 self.dx = self.limit * 2.0 / self.num_states @@ -13,20 +14,19 @@ def __init__(self): self.limit, self.num_states)[:,None] - self.num_systems = 3 - self.var = [3, 1, .5] + self.var = np.array([3, 1, .5]) self.drift = 0#-.15 # systems drifts left # action set - self.u = np.array([0, .5, -.5]) + self.u = np.array([0, .1, .5, -.5]) self.L = np.zeros((len(self.var), len(self.u), self.num_states, self.num_states)) for ii in range(len(self.u)): for jj in range(self.num_states): # set the system state - self.x = np.ones(self.num_states) * self.domain[jj] + self.x = np.ones(self.num_systems) * self.domain[jj] # get the probability distribution of x old_px = self.gen_px() # apply the control signal @@ -34,7 +34,7 @@ def __init__(self): # get the new probability distribution of x px = self.gen_px() # calculate the change in the probability distribution - self.L[:, ii, :, jj] = np.copy(px - old_px) + self.L[:, ii, :, jj] = np.copy(px - old_px).T # the initial state probability self.x = np.zeros(self.num_systems) @@ -58,22 +58,18 @@ def make_v(self, mean=0): self.v += self.make_gauss(mean=mean+2,var=.6) def physics(self, u): - for ii in range(self.num_systems): - self.x[ii] += (self.drift + u[ii]) # simple physics - self.x[ii] = min(self.limit, max(-self.limit, self.x[ii])) + self.x += (self.drift + u) # simple physics + self.x = np.minimum(self.limit, np.maximum(-self.limit, self.x)) def gen_px(self, x=None, var=None): x = np.copy(self.x) if x is None else x var = self.var if var is None else var - px = np.zeros((self.num_systems, self.num_states)) - # TODO: rewrite this to avoid the for loop just using matrices - for ii in range(self.num_systems): - px[ii] = self.make_gauss(x[ii], var[ii]).flatten() - # make sure no negative values - px[ii][np.where(px[ii] < 0)] = 0.0 - # make sure things sum to 1 - px[ii] /= np.max(np.sum(px[ii]) * self.dx) + px = self.make_gauss(x, var) + # make sure no negative values + px[np.where(px < 0)] = 0.0 + # make sure things sum to 1 + px /= np.sum(px, axis=0) * self.dx return px @@ -89,13 +85,16 @@ def anim_animate(self, i): # calculate the weights for the actions self.wu = np.zeros((self.num_systems, len(self.u))) - + self.wu2 = np.zeros((self.num_systems, len(self.u))) + for ii in range(self.num_systems): - for jj in range(len(self.u)): - # constrain so that you can only weight actions positively - self.wu[ii,jj] = max(0, np.dot(self.v.T, np.dot(self.L[ii,jj], self.px[ii]))) + # calculate weights for all actions simultaneously, v.T * L_i * p(x) + # constrain so that you can only weight actions positively + self.wu[ii] = np.maximum(self.wu[ii], + np.einsum('lj,ij->i', self.v.T, + np.einsum('ijk,k->ij', self.L[ii], self.px[:,ii]))) + # constrain so that total output power sum_j u_j**2 = 1 if np.sum(self.wu[ii]) != 0: - # constrain so that total output power sum_j u_j**2 = 1 self.wu[ii] /= np.sqrt(np.sum(self.wu[ii]**2)) # track information for plotting @@ -115,9 +114,9 @@ def anim_animate(self, i): # move the target around slowly self.make_v(np.sin(i*.1)*5) - self.px_line0.set_data(range(self.num_states), self.px[0]) - self.px_line1.set_data(range(self.num_states), self.px[1]) - self.px_line2.set_data(range(self.num_states), self.px[2]) + self.px_line0.set_data(range(self.num_states), self.px[:,0]) + self.px_line1.set_data(range(self.num_states), self.px[:,1]) + self.px_line2.set_data(range(self.num_states), self.px[:,2]) self.v_line.set_data(range(self.num_states), self.v) return self.v_line, self.px_line0, self.px_line1, self.px_line2 @@ -132,7 +131,7 @@ def run(self): plt.xlim([0, self.num_states-1]) plt.xticks(np.linspace(0, self.num_states, 11), np.linspace(-10, 10, 11)) - plt.ylim([-1, 1]) + plt.ylim([-1, 1.5]) anim = animation.FuncAnimation(fig, self.anim_animate, init_func=self.anim_init, frames=500, From 2516461259a6db44b4b4af78712b324df4c4d4f7 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Sat, 21 May 2016 11:33:35 -0400 Subject: [PATCH 08/10] TODO:RA control not working properly anymore --- risk_aware_control/riskaware.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index 9c0555d..82e834e 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -3,6 +3,9 @@ import matplotlib.pyplot as plt from matplotlib import animation +#TODO: RA control isn't working as it should be, as on initial commit where +# the most variance means it stays closest to the center of the road, why! + class Runner: def __init__(self): From e39ce0914fe623c2c8be9f067b96e6afc8bd2935 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Sun, 22 May 2016 15:34:08 -0400 Subject: [PATCH 09/10] Fixed - visibly working properly Turns out there was no problem, it was just really hard to see it working because of the normalization and the long tails on the Gaussians everything stayed in the center. Set the Gaussian values < .1 = 0 and now the normalization doesn't have such a ubiquitous effect on the control, can see the variance affecting each sitch. --- risk_aware_control/riskaware.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index 82e834e..f72dfa0 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -17,12 +17,12 @@ def __init__(self): self.limit, self.num_states)[:,None] - self.var = np.array([3, 1, .5]) + self.var = np.array([2, 1, .5]) self.drift = 0#-.15 # systems drifts left # action set - self.u = np.array([0, .1, .5, -.5]) + self.u = np.array([0, .25, -.25, .5, -.5]) self.L = np.zeros((len(self.var), len(self.u), self.num_states, self.num_states)) @@ -42,6 +42,8 @@ def __init__(self): # the initial state probability self.x = np.zeros(self.num_systems) self.px = self.gen_px() + + self.track_lane_center = [] # also need a cost function (Gaussian to move towards the center) self.make_v() @@ -56,9 +58,10 @@ def make_gauss(self, mean=0, var=.5): def make_v(self, mean=0): # set up the road self.v = self.make_gauss(mean=mean,var=2) * 5 - 1 - self.v[np.where(self.v > .5)] = .5 + self.v[np.where(self.v > .5)] = 1 # make a preference for being in the right lane self.v += self.make_gauss(mean=mean+2,var=.6) + self.track_lane_center.append(mean+2) def physics(self, u): self.x += (self.drift + u) # simple physics @@ -70,7 +73,7 @@ def gen_px(self, x=None, var=None): px = self.make_gauss(x, var) # make sure no negative values - px[np.where(px < 0)] = 0.0 + px[np.where(px < .1)] = 0.0 # make sure things sum to 1 px /= np.sum(px, axis=0) * self.dx return px @@ -217,7 +220,7 @@ def run(self): ax.set_yticklabels([]) plt.subplot(runner.num_systems+2, 1, 5) - target_center = np.sin(X[0]*.1)*5 + 2 + target_center = np.array(runner.track_lane_center)[:-1] plt.plot((runner.track_position[:,0] - target_center)**2, lw=2) plt.plot((runner.track_position[:,1] - target_center)**2, lw=2) plt.plot((runner.track_position[:,2] - target_center)**2, lw=2) From f1afae2b9493afe9e68ad29ece242ef737ad4b64 Mon Sep 17 00:00:00 2001 From: travis dewolf Date: Sun, 29 May 2016 19:17:10 -0400 Subject: [PATCH 10/10] removed seaborn dependencies --- risk_aware_control/riskaware.py | 36 +++++++++++++-------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/risk_aware_control/riskaware.py b/risk_aware_control/riskaware.py index f72dfa0..59a96c9 100644 --- a/risk_aware_control/riskaware.py +++ b/risk_aware_control/riskaware.py @@ -1,11 +1,8 @@ import numpy as np -import seaborn +# import seaborn import matplotlib.pyplot as plt from matplotlib import animation -#TODO: RA control isn't working as it should be, as on initial commit where -# the most variance means it stays closest to the center of the road, why! - class Runner: def __init__(self): @@ -22,7 +19,7 @@ def __init__(self): self.drift = 0#-.15 # systems drifts left # action set - self.u = np.array([0, .25, -.25, .5, -.5]) + self.u = np.array([0, 1, -1, .5, -.5, .25, -.25]) self.L = np.zeros((len(self.var), len(self.u), self.num_states, self.num_states)) @@ -58,7 +55,7 @@ def make_gauss(self, mean=0, var=.5): def make_v(self, mean=0): # set up the road self.v = self.make_gauss(mean=mean,var=2) * 5 - 1 - self.v[np.where(self.v > .5)] = 1 + self.v[np.where(self.v > .5)] = .5 # make a preference for being in the right lane self.v += self.make_gauss(mean=mean+2,var=.6) self.track_lane_center.append(mean+2) @@ -73,7 +70,7 @@ def gen_px(self, x=None, var=None): px = self.make_gauss(x, var) # make sure no negative values - px[np.where(px < .1)] = 0.0 + px[np.where(px < 0)] = 0.0 # make sure things sum to 1 px /= np.sum(px, axis=0) * self.dx return px @@ -91,12 +88,11 @@ def anim_animate(self, i): # calculate the weights for the actions self.wu = np.zeros((self.num_systems, len(self.u))) - self.wu2 = np.zeros((self.num_systems, len(self.u))) for ii in range(self.num_systems): # calculate weights for all actions simultaneously, v.T * L_i * p(x) # constrain so that you can only weight actions positively - self.wu[ii] = np.maximum(self.wu[ii], + self.wu[ii] = np.maximum(np.ones(len(self.u)), np.einsum('lj,ij->i', self.v.T, np.einsum('ijk,k->ij', self.L[ii], self.px[:,ii]))) # constrain so that total output power sum_j u_j**2 = 1 @@ -153,12 +149,9 @@ def run(self): # generate some nice plots fig = plt.figure(figsize=(8, 8)) - # do some scaling to plot the same as seaborn heat plots - fix_for_plot = lambda x: (np.array(x).squeeze() / - runner.limit / -2.0 + .5) * runner.num_states - track_target1 = fix_for_plot(runner.track_target1) - track_target2 = fix_for_plot(runner.track_target2) - track_position = fix_for_plot(runner.track_position) + track_target1 = np.array(runner.track_target1).squeeze() + track_target2 = np.array(runner.track_target2).squeeze() + track_position = np.array(runner.track_position).squeeze() runner.track_position = np.array(runner.track_position) time = track_position.shape[0] @@ -170,10 +163,10 @@ def run(self): plt.title('Position on road') plt.subplot(runner.num_systems+2, 1, 4) - plt.fill_between(range(track_target2.shape[0]), - track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.25) plt.fill_between(range(track_target1.shape[0]), - track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.25) + track_target1[:,0], track_target1[:,1], facecolor='y', alpha=.15) + plt.fill_between(range(track_target2.shape[0]), + track_target2[:,0], track_target2[:,1], facecolor='orange', alpha=.45) for ii in range(runner.num_systems): @@ -188,17 +181,16 @@ def run(self): heatmap[:,jj] = runner.make_gauss( mean=runner.track_position[jj, ii], var=runner.var[ii]).flatten() - seaborn.heatmap(heatmap, xticklabels=False, yticklabels=False, - cbar=False, cmap='Blues') + plt.pcolormesh(X, Y, heatmap, cmap='Blues') # plot filled in zones of desirability, first the road plt.fill_between(range(track_target1.shape[0]), track_target1[:,0], track_target1[:,1], - facecolor='y', alpha=.25) + facecolor='y', alpha=.15) # and now the lane plt.fill_between(range(track_target2.shape[0]), track_target2[:,0], track_target2[:,1], - facecolor='orange', alpha=.25) + facecolor='orange', alpha=.45) # plot actual position of each system line, = plt.plot(track_position[:,ii], 'k', lw=3)