diff --git a/contracts.py b/contracts.py
new file mode 100644
index 0000000..fd2fdbe
--- /dev/null
+++ b/contracts.py
@@ -0,0 +1,5 @@
+def requires(booleanStatement):
+    assert(booleanStatement);
+
+def ensures(booleanStatement):
+    assert(booleanStatement);
diff --git a/envs/monitorEncorporated_env.py b/envs/monitorEncorporated_env.py
new file mode 100644
index 0000000..8d0b15e
--- /dev/null
+++ b/envs/monitorEncorporated_env.py
@@ -0,0 +1,281 @@
+﻿from aa_simulation.contracts import *;
+from rllab.envs.proxy_env import ProxyEnv;
+from aa_simulation.envs.base_env import VehicleEnv;
+from rllab.policies.base import Policy; 
+import numpy as np;
+
+from rllab.core.serializable import Serializable;
+from rllab.spaces import Box;
+
+from rllab.envs.base import Step;
+
+def isProperMonitorSubFormula(thisProposedSubFormula):
+    return isinstance(thisProposedSubFormula, str);
+
+# In triple quotes below: the original implementation of 
+# isProperMonitorSubFormula prior to running into 
+# substantial problems pickling functions between components
+"""
+super(MonitorEncorporatedEnv, self).__init__(wrapped_env);
+    if(str(type(thisProposedSubFormula))  != "<class 'function'>"):
+        return False;
+    if(thisProposedSubFormula.__code__.co_argcount != 3):
+        return False;
+    return True;
+"""
+
+
+class MonitorEncorporatedEnv(ProxyEnv):
+    """
+    MonitorEncorporatedEnv: this class provides a way for transforming an instance of the
+    VehicleEnv class (the "wrapped environment") to an environment where the monitor is used
+    in a variety of ways. The monitor information is provided as a list of the quantitative
+    subformulas - for instance if the monitor is ((A < B) AND (C < D) AND (E < F)), then the 
+    monitor is provided as [B -A, D -C, E -F]. The monitor information can be used in any subset
+    of the following:
+        (1) Activate fallback controller: in the case the monitor is violated, a fallback controller
+            is used to dictate the actions to be take as opposed to the agent interacting with 
+            the environment. This occurs in all and only the situations where the monitor is 
+            violated. To disable this functionality, simply pass in None for codeForFallbackController;
+            the actions provided through the step-function (see the code below) to the environment 
+            then will always be acted out.
+        (2) Influence the reward returned; the reward returned by the environment is a weighted
+            combination of the reward given by the wrapped-environment and the value given by the 
+            quantitative monitor. Specifically, the reward given is:
+                reward = rewardFromWrappedEnvironment + \
+                        weightForQuantMonitorValueInReward * min(B -A, D -C, E -F)
+            To effectively disable this functionality, set weightForQuantMonitorValueInReward to 0.0 .
+        (3) Additional features in observations: in addition to the features provided by the 
+            wrapped-environment, the quantitative-monitor subformulas can be provided as additional
+            features for a state. For instance, if the initial feature vector is :
+                 [f_1, f_2, ..., f_{n-1}, f_n]
+            the features can be expanded to include:
+                 [f_1, f_2, ..., f_{n-1}, f_n, B -A, D -C, E -F]
+            Note that since we use the quantitative monitor subformulas, the features vary over the
+            set of the state-space where the monitor is not violated. This is in contrast to if
+            the subformulas from the original monitor, in which case the binary values would not
+            vary of the safe-set - the moment any one of them changes, the environment which trigger
+            the fallback controller to kind the vehicle, which makes having those features in such
+            an arrangement have little utility. To enable these additional features, set 
+            useQuantMonitorSubformulasAsFeatures to true, and to disable, set 
+            useQuantMonitorSubformulasAsFeatures to false.
+    Again, any subset of the above three options is valid - so there are at least 8 general modes of 
+    operation for this class.
+
+    
+    A Note on Some Unfortunate Hacks Made To Get The rllab Infrastructure to Work With This Code:
+        Unfortunately, various parts of the rllab code try to do clever things with pickling is
+        saving results and passing parameters around in the infrastructure. This limits how much plain
+        functions can be passed around as parameters - while cloudpickle can be substituted in 
+        many places for pickle in the rllab code, at least three challenges remain there: (1) rllab uses
+        some functionality of pickle not supported by cloudpickle (specific attributes pickle has that
+        cloudpickle does not), (2) rllab is a project outside the general control of the aa-group, and
+        the code base for it has been frozen for some time in favor of developing a new platform; as
+        such, we would have to modify our own local copy of rllab and distribute to any in the aa-group
+        who want to use it, (3) in addition to the python package "pickle", rllab also takes advantage of
+        numpy pickle functions that apparently have some similar issues. 
+
+        As a work-around to the difficulties listed above, the code was change to use code for functions
+        in place of python implementations of the functions. That is, instead of passing in, say,
+            lambda x: x +2 
+        the code requires that the string
+            "x + 2"
+        be passed in. Specifically, the elements of quantitativeMonitorSubFormulas must be strings that can
+        be evaluated by the python built-in eval ,  and codeForFallbackController must be text 
+        evaluatable by the python built-in exec and must define the function fallbackController .
+        Plans for near-future development include investigating better ways to handle the circumstances.
+        For the first swing at developing these functionalities, this arrangement should be sufficient
+        and not overly brittle nor overly complex.
+    """
+
+    def __init__(self, wrapped_env, quantitativeMonitorSubFormulas, \
+            weightForQuantMonitorValueInReward, codeForFallbackController, useQuantMonitorSubformulasAsFeatures):
+        requires(isinstance(wrapped_env, VehicleEnv));
+        requires(isinstance(quantitativeMonitorSubFormulas, list)); 
+        # NOTE: we allow quantitativeMonitorSubFormulas to be an empty list, 
+        #     in which case no monitor violations should ever occur
+        requires(all([isProperMonitorSubFormula(x) for x in quantitativeMonitorSubFormulas])); 
+        requires(isinstance(weightForQuantMonitorValueInReward, float));
+        # NOTE: we allow weightForQuantMonitorValueInReward to be negative, in 
+        #     case the agent would be rewarded for violating the monitor condition.
+        #     This might be useful for testing or to empirically judge the 
+        #     influence of the monitor signal encorporated via the reward function.
+        requires(codeForFallbackController == None or isinstance(codeForFallbackController, str));
+        requires(isinstance(useQuantMonitorSubformulasAsFeatures, bool));
+
+        # NOTE: we cannot do
+        #         ProxyEnv.__init__(self, wrapped_env);
+        #     or
+        #         super(MonitorEncorporatedEnv, self).__init__(wrapped_env);  
+        #     since the init function here (unlike the ProxyEnv class) takes in multiple
+        #     arguments and results in local() not being able to find all of them if
+        #     we try calling as listed above.
+        Serializable.quick_init(self, locals())
+        self._wrapped_env = wrapped_env
+
+        # TODO: consider including python-ic leading underscore as necessary...
+        self.quantitativeMonitorSubFormulas = quantitativeMonitorSubFormulas;
+        self.weightForQuantMonitorValueInReward = weightForQuantMonitorValueInReward;
+        if(codeForFallbackController != None):
+            exec(codeForFallbackController);
+            self.fallbackController = locals()["fallbackController"];
+        else:
+            self.fallbackController = None;
+
+        self.useQuantMonitorSubformulasAsFeatures = useQuantMonitorSubformulasAsFeatures; 
+
+        # TODO: Select better informed values of self._action for prior to
+        #     the when the controller makes it first decision Grep over this
+        #     file to see where self._action is used and why the value prior to
+        #     the first choice might have some impact.
+        self._action = np.array([0,0]); # Note that this is the actual action performed on the 
+            # environment, not necessarly the same as self._wrapped_env.action -
+            # in the case of a monitor violation, and if a fallback controller is
+            # specified, then the action is dictated by the fallback-controller, not
+            # the initial policy.
+
+        # NOTE: Setting the two _state variable below is important for calculating
+        #     the acceleration fed into the quantitative-monitor subformulas. See
+        #     the function evaluate_quantitativeMonitorSubFormulas .
+        self._state = np.zeros(self.observation_space.flat_dim,)
+
+        return;
+
+
+    def getAxiluraryInformation(self, state):
+        fakeTime = 0.0; #the time is not actually used in the dynamics in question...
+        # Note below we use self._action not self._wrapped_env.action, since we want the
+        # actual action performed, not the one the wrapped-controller would have done....
+        state_dot = self._wrapped_env._model._dynamics(state, fakeTime, self._action);
+        """
+        recall:
+        state_dot[0] = pos_x_dot
+        state_dot[1] = pos_y_dot
+        state_dot[2] = yaw_rate
+        state_dot[3] = v_x_dot
+        state_dot[4] = v_y_dot
+        state_dot[5] = yaw_rate_dot
+        """
+        return state_dot[3:6]; # returning the accelerations.
+
+
+    def evaluate_quantitativeMonitorSubFormulas(self, state, action):
+        axiluraryInformation = self.getAxiluraryInformation(state)
+        listToReturn = [\
+            eval(x, {"state" : state, "action": action, "axiluraryInformation" :  axiluraryInformation, "np" :np}) \
+            for  x in self.quantitativeMonitorSubFormulas];
+        ensures(isinstance(listToReturn, list));
+        ensures(len(listToReturn) == len(self.quantitativeMonitorSubFormulas));
+        return listToReturn;
+
+    def getMin_evaluate_quantitativeMonitorSubFormulas(self, state, action):
+        # This function handles the edge case where self.quantitativeMonitorSubFormulas is an 
+        # empty list - helps avoid silly errors that might result from the more 
+        # straight-forward use of min(self.evaluate_quantitativeMonitorSubFormulas(state, action))
+        # at various locations.
+        if(len(self.quantitativeMonitorSubFormulas) == 0):
+            return 0.0; # NOTE: we consider the monitor to be violated when the value from
+                # the quantitative monitor is negative, so returning zero should not consistute
+                # a monitor violation.
+        else:
+            return min(self.evaluate_quantitativeMonitorSubFormulas(state, action));
+        raise Exception("Control should never reach here");
+        return;
+
+
+    def reset(self):
+        """
+        Reset environment back to original state.
+        """
+        self._action = np.array([0,0]); # None
+        self._wrapped_env._state =  self._action;
+        self._state = self._wrapped_env.get_initial_state
+        self._wrapped_env._state = self._state;
+        observation = self.state_to_observation(self._state)
+
+        # Reset renderer if available
+        if self._wrapped_env._renderer is not None:
+            self._wrapped_env._renderer.reset()
+
+        return observation
+
+
+    def helper_step(self, action):
+        """
+        Move one iteration forward in simulation.
+        """
+        if action[0] < 0:   # Only allow forward direction
+            action[0] = 0
+        nextstate = self._wrapped_env._model.state_transition(self._state, action,
+                self._wrapped_env._dt)
+        self._state = nextstate
+        # Notice below that we use the
+        # state_to_observation and get_reward functions defined in this class as oppossed to the
+        # ones defined in the self._wrapped_class, hence the need to reimplement this
+        # function (helper_step) as oppossed to simply calling self._wrapped_class.step 
+        reward, info = self.get_reward(nextstate, action)
+        observation = self.state_to_observation(nextstate)
+        return Step(observation=observation, reward=reward, done=False,
+                dist=info['dist'], vel=info['vel'], kappa=self._wrapped_env._model.kappa)
+
+
+    def step(self, action):
+        if(self.fallbackController != None): 
+            monitorHasBeenViolated = (\
+                self.getMin_evaluate_quantitativeMonitorSubFormulas(self._wrapped_env._state, action) < 0.0 );
+            action = self.fallbackController(self._wrapped_env._state);  
+        self._action = action;
+        # TODO: consider whether we should also set self._wrapped_env._action or make
+        #     the opion of whether or not to do that a variable passed in to the 
+        #     init function of this class.
+        return self.helper_step(action);
+
+
+    def get_reward(self, state, action):
+        reward ,info = self._wrapped_env.get_reward(state, action); 
+        if(self.weightForQuantMonitorValueInReward != 0.0): # this conditional prevents unnecessary
+            # computation, but is not strictly needed.
+            minimumQuantMonitorValue = self.getMin_evaluate_quantitativeMonitorSubFormulas(state, action);
+            reward = reward + self.weightForQuantMonitorValueInReward * minimumQuantMonitorValue;
+        return reward, info;
+
+
+    def state_to_observation(self, state):
+        originalObs = self._wrapped_env.state_to_observation(state);
+        if(self.useQuantMonitorSubformulasAsFeatures):
+            quantMonitorInput = np.array(self.evaluate_quantitativeMonitorSubFormulas(state, self._action));
+            originalObs = np.concatenate([originalObs, quantMonitorInput]);
+        return originalObs;
+
+
+    @property
+    def observation_space(self):
+        """
+        Define the shape of input vector to the neural network.
+        """
+        dimensionOfOriginalObservationSpace = self._wrapped_env.observation_space.flat_dim;
+        if(not self.useQuantMonitorSubformulasAsFeatures):
+            return Box(low=-np.inf, high=np.inf, shape=(\
+                    dimensionOfOriginalObservationSpace,));
+        else:
+            return Box(low=-np.inf, high=np.inf, shape=(\
+                    dimensionOfOriginalObservationSpace +len(self.quantitativeMonitorSubFormulas)));
+        raise Exception("Control should never reach here");
+        return;
+
+
+    @property
+    def get_initial_state(self):
+        state = self._wrapped_env.get_initial_state; 
+        # NOTE: Setting the two state variables below are important for calculating
+        #     the acceleration fed into the quantitative-monitor subformulas. See
+        #     the function evaluate_quantitativeMonitorSubFormulas
+        self._state = state;
+        self._wrapped_env._state = state;
+        return state
+
+
+    def get_action(observation):
+        return self._wrapped_env.get_action(observation);
+
+
diff --git a/test/test_monitorEncorporated_straight.py b/test/test_monitorEncorporated_straight.py
new file mode 100644
index 0000000..6c33610
--- /dev/null
+++ b/test/test_monitorEncorporated_straight.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: DBayani
+largely based on test_monitorEncorporated_straight.py by edwardahn
+
+Demonstrations of testing with MonitorEncorporatedEnv
+environment wrapped around the straight vehicle environment.
+"""
+
+import argparse
+import sys
+
+import joblib
+import matplotlib.pyplot as plt
+import numpy as np
+
+from rllab.misc import tensor_utils
+
+from aa_simulation.envs.monitorEncorporated_env import MonitorEncorporatedEnv;
+from aa_simulation.envs.straight.straight_env import StraightEnv; 
+from aa_simulation.misc.utils import normalize_angle
+
+
+def rollout(env, agent, line_params, max_path_length=np.inf,
+        animated=False):
+    """
+    Modified rollout function from rllab.sampler.utils to run
+    arbitrary straight trajectories.
+    """
+    observations = []
+    rewards = []
+    actions = []
+    agent_infos = []
+    env_infos = []
+
+    projected_trajectory = []
+    x0, y0, angle = line_params
+    env.reset()
+    agent.reset()
+
+    # Force start state to be zeros
+    # Note: Because env is an instance of NormalizedEnv, there is no
+    #   way of writing a custom function that I can use to set the
+    #   initial state. Consequently we just force set it here.
+    start_yaw = angle
+    start_state = np.array([x0, y0, start_yaw, 0, 0, 0])
+    env._wrapped_env._state = start_state
+    o = np.zeros(5)
+
+    path_length = 0
+    if animated:
+        env.render()
+    print('--------------------')
+    while path_length < max_path_length:
+        print('')
+        state = env._wrapped_env._state
+        print('State = ', state)
+        projected_o = StraightEnv.project_line(state, x0, y0, angle)
+        print('Projected state = ', projected_o)
+        _, agent_info = agent.get_action(env.state_to_observation(projected_o));
+        a = agent_info['mean']
+        print('Computed action = ', a)
+        next_o, r, d, env_info = env.step(a)
+        print('Next observation = ', next_o)
+        observations.append(env.observation_space.flatten(o))
+        rewards.append(r)
+        actions.append(env.action_space.flatten(a))
+        agent_infos.append(agent_info)
+        env_infos.append(env_info)
+        projected_trajectory.append(projected_o)
+        path_length += 1
+        if d:
+            break
+        o = next_o
+        if animated:
+            env.render()
+    print('--------------------')
+
+    return dict(
+        observations=tensor_utils.stack_tensor_list(observations),
+        actions=tensor_utils.stack_tensor_list(actions),
+        rewards=tensor_utils.stack_tensor_list(rewards),
+        agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos),
+        env_infos=tensor_utils.stack_tensor_dict_list(env_infos),
+    ), projected_trajectory
+
+
+def plot_trajectories(trajectory1, trajectory2):
+    """
+    Plot trajectory of unprojected path and projected path.
+    """
+    y1 = trajectory1[:,0]
+    y2 = trajectory2[:,0]
+    t = np.arange(len(y1))
+
+    diff = abs(y2 - y1)
+    max_diff = max(diff)
+    mean_diff = np.mean(diff)
+    print('\nMaximum absolute difference =\t', max_diff)
+    print('Mean absolute difference =\t', mean_diff)
+
+    plt.figure()
+    plt.title('Trajectories: Relative y-values')
+    plt.xlabel('Time step')
+    plt.ylabel('y (m)')
+    plt.plot(t, y1, 'b', t, y2, 'r')
+    plt.legend(['Unprojected', 'Projected'])
+    plt.show()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('file', type=str,
+                        help='path to the snapshot file')
+    parser.add_argument('--max_path_length', type=int, default=100,
+                        help='Max length of rollout')
+    parser.add_argument('--render', dest='render',
+            action='store_true', help='Rendering')
+    parser.add_argument('--no-render', dest='render',
+            action='store_false', help='Rendering')
+    parser.set_defaults(render=False)
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_arguments()
+    data = joblib.load(args.file)
+    policy = data['policy']
+    env = data['env']
+    plt.ion()
+
+
+    #np.set_printoptions(precision=4, suppress=True)
+
+
+    # Set fixed random seed
+    np.random.seed(9)
+
+    # Sample rollouts with different projections (change line_params2)
+    line_params1 = np.array([0, 0, 0])
+    line_params2 = np.array([3, 0, np.pi/2])
+    path1, projected_states1 = rollout(env, policy, line_params1,
+            max_path_length=args.max_path_length, animated=args.render)
+    path2, projected_states2 = rollout(env, policy, line_params2,
+            max_path_length=args.max_path_length, animated=args.render)
+
+    # Plot projected trajectories on graph
+    projected_states1 = np.array(projected_states1)
+    projected_states2 = np.array(projected_states2)
+    plot_trajectories(projected_states1, projected_states2)
+
+    # Block until key is pressed
+    sys.stdout.write("Press <enter> to continue: ")
+    input()
+
+
+# if __name__ == "__main__":
+main()
diff --git a/train/train_monitorEncorporated_circle_planner.py b/train/train_monitorEncorporated_circle_planner.py
new file mode 100644
index 0000000..2a8f95d
--- /dev/null
+++ b/train/train_monitorEncorporated_circle_planner.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: DBayani
+largely based on train_straight_planner.py by edwardahn 
+
+Demonstrations of training with MonitorEncorporatedEnv
+environment wrapped around the straight vehicle environment.
+"""
+
+import argparse
+
+import joblib
+import lasagne.init as LI
+import lasagne.layers as L
+import lasagne.nonlinearities as LN
+import numpy as np
+
+from rllab.algos.trpo import TRPO
+from rllab.core.lasagne_layers import ParamLayer
+from rllab.core.lasagne_powered import LasagnePowered
+from rllab.core.network import MLP
+# from rllab.envs.base import Env
+from rllab.misc import ext, logger
+from rllab.misc.instrument import run_experiment_lite, VariantGenerator
+from rllab.misc.resolve import load_class
+from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
+from sandbox.cpo.baselines.linear_feature_baseline import LinearFeatureBaseline
+
+from aa_simulation.envs.circle.circle_env import CircleEnv
+from aa_simulation.envs.monitorEncorporated_env import MonitorEncorporatedEnv; 
+
+# Pre-trained policy and baseline
+policy = None
+baseline = None
+
+
+
+from rllab.policies.base import Policy;
+
+
+# We keep in the below dead-code as a reminder that the ideal way the 
+# fallback-controller would be specified is with an instance of the policy class.
+"""
+class DummbyFallbackPolicy(Policy):
+    def get_action(self, observation):
+        return np.array([0,0]);
+"""
+
+
+
+def run_task(vv, log_dir=None, exp_name=None):
+    global policy
+    global baseline
+
+    trpo_stepsize = 0.01
+    trpo_subsample_factor = 0.2
+
+    # Check if variant is available
+    if vv['model_type'] not in ['BrushTireModel', 'LinearTireModel']:
+        raise ValueError('Unrecognized model type for simulating robot')
+    if vv['robot_type'] not in ['MRZR', 'RCCar']:
+        raise ValueError('Unrecognized robot type')
+
+
+    # Load environment
+    baseEnv = CircleEnv(
+        target_velocity=vv['target_velocity'],
+        radius=vv['radius'],
+        dt=vv['dt'],
+        model_type=vv['model_type'],
+        robot_type=vv['robot_type']
+    )
+    env = MonitorEncorporatedEnv(\
+        wrapped_env = baseEnv, \
+        quantitativeMonitorSubFormulas = vv["quantitativeMonitorSubFormulas"], \
+        weightForQuantMonitorValueInReward = vv["weightForQuantMonitorValueInReward"], \
+        codeForFallbackController = vv["codeForFallbackController"], \
+        useQuantMonitorSubformulasAsFeatures = vv["useQuantMonitorSubformulasAsFeatures"]\
+        );
+
+    # Save variant information for comparison plots
+    variant_file = logger.get_snapshot_dir() + '/variant.json'
+    logger.log_variant(variant_file, vv)
+
+    # Set variance for each action component separately for exploration
+    # Note: We set the variance manually because we are not scaling our
+    #       action space during training.
+    init_std_speed = vv['target_velocity'] / 4
+    init_std_steer = np.pi / 6
+    init_std = [init_std_speed, init_std_steer]
+
+    # Build policy and baseline networks
+    # Note: Mean of policy network set to analytically computed values for
+    #       faster training (rough estimates for RL to fine-tune).
+    if policy is None or baseline is None:
+        wheelbase = 0.257
+        target_velocity = vv['target_velocity']
+        target_steering = np.arctan(wheelbase / vv['radius'])  # CCW
+        output_mean = np.array([target_velocity, target_steering])
+        hidden_sizes = (32, 32)
+
+        # In mean network, allow output b values to dominate final output
+        # value by constraining the magnitude of the output W matrix. This is
+        # to allow faster learning. These numbers are arbitrarily chosen.
+        W_gain = min(vv['target_velocity'] / 5, np.pi / 15)
+
+        mean_network = MLP(
+            input_shape=(env.spec.observation_space.flat_dim,),
+            output_dim=env.spec.action_space.flat_dim,
+            hidden_sizes=hidden_sizes,
+            hidden_nonlinearity=LN.tanh,
+            output_nonlinearity=None,
+            output_W_init=LI.GlorotUniform(gain=W_gain),
+            output_b_init=output_mean
+        )
+        policy = GaussianMLPPolicy(
+            env_spec=env.spec,
+            hidden_sizes=hidden_sizes,
+            init_std=init_std,
+            mean_network=mean_network
+        )
+        baseline = LinearFeatureBaseline(
+            env_spec=env.spec,
+            target_key='returns'
+        )
+
+    # Reset variance to re-enable exploration when using pre-trained networks
+    else:
+        policy._l_log_std = ParamLayer(
+            policy._mean_network.input_layer,
+            num_units=env.spec.action_space.flat_dim,
+            param=LI.Constant(np.log(init_std)),
+            name='output_log_std',
+            trainable=True
+        )
+        obs_var = policy._mean_network.input_layer.input_var
+        mean_var, log_std_var = L.get_output([policy._l_mean, policy._l_log_std])
+        policy._log_std_var = log_std_var
+        LasagnePowered.__init__(policy, [policy._l_mean, policy._l_log_std])
+        policy._f_dist = ext.compile_function(
+            inputs=[obs_var],
+            outputs=[mean_var, log_std_var]
+        )
+
+    algo = TRPO(
+        env=env,
+        policy=policy,
+        baseline=baseline,
+        batch_size=600,
+        max_path_length=env.horizon,
+        n_itr=600,
+        discount=0.99,
+        step_size=trpo_stepsize,
+        plot=False,
+    )
+    algo.train()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--network', type=str,
+            help='Path to snapshot file of pre-trained network')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    global policy
+    global baseline
+
+    # Load pre-trained network if available
+    args = parse_arguments()
+    if args.network is not None:
+        data = joblib.load(args.network)
+        policy = data['policy']
+        baseline = data['baseline']
+        use_pretrained = True
+    else:
+        use_pretrained = False
+
+    # Run multiple experiment variants at once
+    vg = VariantGenerator()
+
+    # Non-configurable parameters (do not change)
+    vg.add('trajectory', ['Circle'])
+    vg.add('objective', ['TargetVelocity'])
+    vg.add('algo', ['TRPO'])
+
+
+    targetVelocity = 1.0
+    epsilonValue = 0.05;
+
+    # Configurable parameters
+    #   Options for model_type: 'BrushTireModel', 'LinearTireModel'
+    #   Options for robot_type: 'MRZR', 'RCCar'
+    seeds = [100, 200]
+    robot_type = 'RCCar'
+    use_ros = False
+    vg.add('seed', seeds)
+    vg.add('target_velocity', [targetVelocity])
+    vg.add('radius', [1.0])
+    vg.add('dt', [0.1])
+    vg.add('eps', [epsilonValue])
+    vg.add('model_type', ['BrushTireModel'])
+    vg.add('robot_type', [robot_type])
+    # We are mostly uninterested in the negative values below, since they would encourage
+    #     violating the monitor....
+    vg.add('weightForQuantMonitorValueInReward', [0.0, 2.0, 0.125, -0.125, 1.0, 0.25, 0.5, -2.0]);
+    vg.add('useQuantMonitorSubformulasAsFeatures', [True, False]);
+
+    quantMonitorSubformula_carDrivingAwayFromLine = str(epsilonValue) + "- state[0]"; # I.e., if more than 
+        # epsilon away from the target raduis, then the quantitative monitor is violated.
+    quantitativeMonitorSubFormulas = \
+        [quantMonitorSubformula_carDrivingAwayFromLine];
+
+    codeForFallbackController = """
+import numpy as np;
+def fallbackController(observation):
+    deltaRaduis = observation[0]
+
+    amountToSteerOffCenter = 0.1; # very slight to avoid oversteering... but completely
+        # fails to consider drifting. As such, this is VERY MUCH a first-swing attempt
+        # with a fallback controller that we would not actually use.
+    velocityToUse = """ + str(targetVelocity) + """;
+
+    # steers toward the line
+    steeringAngle = amountToSteerOffCenter * np.sign(deltaRaduis); # If we are outside the circle, we 
+        # stear toward the inside, if we are on the inside, we steer toward the outside....
+    return np.array([steeringAngle, velocityToUse]);
+"""; 
+
+    vg.add("codeForFallbackController", [codeForFallbackController, None]);
+    vg.add("quantitativeMonitorSubFormulas", [quantitativeMonitorSubFormulas, []]);
+
+    print('Number of Configurations: ', len(vg.variants()))
+
+    # Run each experiment variant
+    indexOfStartVariant = 126; 
+    for vv in vg.variants():
+        indexOfStartVariant = indexOfStartVariant - 1;
+        print("indexOfStartVariant:" + str(indexOfStartVariant), flush=True);
+        if(indexOfStartVariant > 0):
+            continue;
+        run_experiment_lite(
+            stub_method_call=run_task,
+            variant=vv,
+            n_parallel=1,
+            snapshot_mode='last',
+            seed=vv['seed']
+        )
+
+
+if __name__ == '__main__':
+    main()
+
+
+
+
+
+
diff --git a/train/train_monitorEncorporated_straight_planner.py b/train/train_monitorEncorporated_straight_planner.py
new file mode 100644
index 0000000..c7d1c47
--- /dev/null
+++ b/train/train_monitorEncorporated_straight_planner.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: DBayani
+largely based on train_straight_planner.py by edwardahn 
+
+Demonstrations of training with MonitorEncorporatedEnv
+environment wrapped around the straight vehicle environment.
+"""
+
+import argparse
+
+import joblib
+import lasagne.init as LI
+import lasagne.layers as L
+import lasagne.nonlinearities as LN
+import numpy as np
+
+from rllab.algos.trpo import TRPO
+from rllab.core.lasagne_layers import ParamLayer
+from rllab.core.lasagne_powered import LasagnePowered
+from rllab.core.network import MLP
+# from rllab.envs.base import Env
+from rllab.misc import ext, logger
+from rllab.misc.instrument import run_experiment_lite, VariantGenerator
+from rllab.misc.resolve import load_class
+from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
+from sandbox.cpo.baselines.linear_feature_baseline import LinearFeatureBaseline
+
+from aa_simulation.envs.straight.straight_env import StraightEnv
+from aa_simulation.envs.monitorEncorporated_env import MonitorEncorporatedEnv; 
+
+# Pre-trained policy and baseline
+policy = None
+baseline = None
+
+
+
+from rllab.policies.base import Policy;
+
+
+# We keep in the below dead-code as a reminder that the ideal way the 
+# fallback-controller would be specified is with an instance of the policy class.
+"""
+class DummbyFallbackPolicy(Policy):
+    def get_action(self, observation):
+        return np.array([0,0]);
+"""
+
+
+
+def run_task(vv, log_dir=None, exp_name=None):
+    global policy
+    global baseline
+
+    # Check if variant is available
+    if vv['model_type'] not in ['BrushTireModel', 'LinearTireModel']:
+        raise ValueError('Unrecognized model type for simulating robot')
+    if vv['robot_type'] not in ['MRZR', 'RCCar']:
+        raise ValueError('Unrecognized robot type')
+
+
+    # Load environment
+    baseEnv = StraightEnv(
+        target_velocity=vv['target_velocity'],
+        dt=vv['dt'],
+        model_type=vv['model_type'],
+        robot_type=vv['robot_type']
+    );
+    env = MonitorEncorporatedEnv(\
+        wrapped_env = baseEnv, \
+        quantitativeMonitorSubFormulas = vv["quantitativeMonitorSubFormulas"], \
+        weightForQuantMonitorValueInReward = vv["weightForQuantMonitorValueInReward"], \
+        codeForFallbackController = vv["codeForFallbackController"], \
+        useQuantMonitorSubformulasAsFeatures = vv["useQuantMonitorSubformulasAsFeatures"]\
+        );
+
+    # Save variant information for comparison plots
+    variant_file = logger.get_snapshot_dir() + '/variant.json'
+    logger.log_variant(variant_file, vv)
+
+    # Set variance for each action component separately for exploration
+    # Note: We set the variance manually because we are not scaling our
+    #       action space during training.
+    init_std_speed = vv['target_velocity'] / 4
+    init_std_steer = np.pi / 6
+    init_std = [init_std_speed, init_std_steer]
+
+    # Build policy and baseline networks
+    # Note: Mean of policy network set to analytically computed values for
+    #       faster training (rough estimates for RL to fine-tune).
+    if policy is None or baseline is None:
+        target_velocity = vv['target_velocity']
+        target_steering = 0
+        output_mean = np.array([target_velocity, target_steering])
+        hidden_sizes = (32, 32)
+
+        # In mean network, allow output b values to dominate final output
+        # value by constraining the magnitude of the output W matrix. This is
+        # to allow faster learning. These numbers are arbitrarily chosen.
+        W_gain = min(vv['target_velocity'] / 5, np.pi / 15)
+
+
+        # Thankfully, the use of env.spec.observation_space.flat_dim should take care of
+        # having to specify the input dimension...
+        mean_network = MLP(
+            input_shape=(env.spec.observation_space.flat_dim,),
+            output_dim=env.spec.action_space.flat_dim,
+            hidden_sizes=hidden_sizes,
+            hidden_nonlinearity=LN.tanh,
+            output_nonlinearity=None,
+            output_W_init=LI.GlorotUniform(gain=W_gain),
+            output_b_init=output_mean
+        )
+        policy = GaussianMLPPolicy(
+            env_spec=env.spec,
+            hidden_sizes=(32, 32),
+            init_std=init_std,
+            mean_network=mean_network
+        )
+        baseline = LinearFeatureBaseline(env_spec=env.spec)
+
+    # Reset variance to re-enable exploration when using pre-trained networks
+    else:
+        policy._l_log_std = ParamLayer(
+            policy._mean_network.input_layer,
+            num_units=env.spec.action_space.flat_dim,
+            param=LI.Constant(np.log(init_std)),
+            name='output_log_std',
+            trainable=True
+        )
+        obs_var = policy._mean_network.input_layer.input_var
+        mean_var, log_std_var = L.get_output([policy._l_mean, policy._l_log_std])
+        policy._log_std_var = log_std_var
+        LasagnePowered.__init__(policy, [policy._l_mean, policy._l_log_std])
+        policy._f_dist = ext.compile_function(
+            inputs=[obs_var],
+            outputs=[mean_var, log_std_var]
+        )
+
+    algo = TRPO(
+        env=env,
+        policy=policy,
+        baseline=baseline,
+        batch_size=600,
+        max_path_length=env.horizon,
+        n_itr=600,
+        discount=0.99,
+        step_size=0.01,
+        plot=False,
+    )
+    algo.train()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--network', type=str,
+            help='Path to snapshot file of pre-trained network')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    global policy
+    global baseline
+
+    # Load pre-trained network if available
+    args = parse_arguments()
+    if args.network is not None:
+        data = joblib.load(args.network)
+        policy = data['policy']
+        baseline = data['baseline']
+        use_pretrained = True
+    else:
+        use_pretrained = False
+
+    # Run multiple experiment variants at once
+    vg = VariantGenerator()
+
+    # Non-configurable parameters (do not change)
+    vg.add('trajectory', ['Straight'])
+    vg.add('objective', ['TargetVelocity'])
+    vg.add('algo', ['TRPO'])
+
+
+    targetVelocity = 1.0
+
+    # Configurable parameters
+    #   Options for model_type: 'BrushTireModel', 'LinearTireModel'
+    #   Options for robot_type: 'MRZR', 'RCCar'
+    seeds = [100, 200]
+    robot_type = 'RCCar'
+    use_ros = False
+    vg.add('seed', seeds)
+    vg.add('target_velocity', [targetVelocity])
+    vg.add('dt', [0.1])
+    vg.add('model_type', ['BrushTireModel'])
+    vg.add('robot_type', [robot_type])
+    # We are mostly uninterested in the negative values below, since they would encourage
+    #     violating the monitor....
+    vg.add('weightForQuantMonitorValueInReward', [0.0, 2.0, 0.125, -0.125, 1.0, 0.25, 0.5, -2.0]);
+    vg.add('useQuantMonitorSubformulasAsFeatures', [True, False]);
+
+    # Original monitor code: "action[1] * (np.sign(state[0]) + np.sign(state[1])) > 0"
+    #     This monitor is violated when the car drives away from y=0...
+    quantMonitorSubformula_carDrivingAwayFromLine = "-action[1] * (np.sign(state[0]) + np.sign(state[1]))";
+    quantitativeMonitorSubFormulas = \
+        [quantMonitorSubformula_carDrivingAwayFromLine];
+
+    codeForFallbackController = """
+import numpy as np;
+def fallbackController(observation):
+    y = observation[0]
+    yaw = observation[1]
+
+    amountToSteerOffCenter = 0.1; # very slight to avoid oversteering... but completely
+        # fails to consider drifting. As such, this is VERY MUCH a first-swing attempt
+        # with a fallback controller that we would not actually use.
+    velocityToUse = """ + str(targetVelocity) + """;
+
+    # steers toward the line
+    steeringAngle = amountToSteerOffCenter * (np.sign(y) + np.sign(yaw));
+    return np.array([steeringAngle, velocityToUse]);
+"""; 
+
+    vg.add("codeForFallbackController", [codeForFallbackController, None]);
+    vg.add("quantitativeMonitorSubFormulas", [quantitativeMonitorSubFormulas, []]);
+
+    print('Number of Configurations: ', len(vg.variants()))
+
+    # Run each experiment variant
+    for vv in vg.variants():
+        run_experiment_lite(
+            stub_method_call=run_task,
+            variant=vv,
+            n_parallel=2,
+            snapshot_mode='last',
+            seed=vv['seed']
+        )
+
+
+if __name__ == '__main__':
+    main()
+