From 3f708507057d9c002a34711edde25772e24b28bd Mon Sep 17 00:00:00 2001 From: Stephen Tramer Date: Wed, 19 Jun 2019 17:04:21 -0700 Subject: [PATCH] Updated Python content to comply with PEP8 (via autopep8) --- CATelcoCustomerChurnModeling.py | 18 +- CATelcoCustomerChurnModelingWithoutDprep.py | 17 +- aml_config/jupyter_notebook_config.py | 398 ++++++++++---------- churn_schema_gen.py | 61 +-- score.py | 55 +-- 5 files changed, 284 insertions(+), 265 deletions(-) diff --git a/CATelcoCustomerChurnModeling.py b/CATelcoCustomerChurnModeling.py index 4c497f5..bea9136 100644 --- a/CATelcoCustomerChurnModeling.py +++ b/CATelcoCustomerChurnModeling.py @@ -16,13 +16,14 @@ from azureml.logging import get_azureml_logger # initialize the logger -run_logger = get_azureml_logger() -run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling','true') +run_logger = get_azureml_logger() +run_logger.log( + 'amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling', 'true') with Package.open_package('CATelcoCustomerChurnTrainingSample.dprep') as pkg: df = pkg.dataflows[0].get_dataframe(spark=False) -columns_to_encode = list(df.select_dtypes(include=['category','object'])) +columns_to_encode = list(df.select_dtypes(include=['category', 'object'])) for column_to_encode in columns_to_encode: dummies = pd.get_dummies(df[column_to_encode]) one_hot_col_names = [] @@ -35,7 +36,7 @@ model = GaussianNB() random_seed = 42 -train, test = train_test_split(df, random_state = random_seed, test_size = 0.3) +train, test = train_test_split(df, random_state=random_seed, test_size=0.3) target = train['churn'].values train = train.drop('churn', 1) @@ -45,21 +46,22 @@ expected = test['churn'].values test = test.drop('churn', 1) predicted = model.predict(test) -print("Naive Bayes Classification Accuracy", accuracy_score(expected, predicted)) +print("Naive Bayes Classification Accuracy", + accuracy_score(expected, predicted)) # log the Naive Bayes Accuracy run_logger.log("Naive Bayes Accuracy", accuracy_score(expected, predicted)) dt = DecisionTreeClassifier(min_samples_split=20, random_state=99) dt.fit(train, target) predicted = dt.predict(test) -print("Decision Tree Classification Accuracy", accuracy_score(expected, predicted)) +print("Decision Tree Classification Accuracy", + accuracy_score(expected, predicted)) # log the DTree Accuracy run_logger.log("DTree Accuracy", accuracy_score(expected, predicted)) # serialize the model on disk in the special 'outputs' folder -print ("Export the model to model.pkl") +print("Export the model to model.pkl") f = open('./outputs/model.pkl', 'wb') pickle.dump(dt, f) f.close() - diff --git a/CATelcoCustomerChurnModelingWithoutDprep.py b/CATelcoCustomerChurnModelingWithoutDprep.py index 55ef754..e78b4f8 100644 --- a/CATelcoCustomerChurnModelingWithoutDprep.py +++ b/CATelcoCustomerChurnModelingWithoutDprep.py @@ -13,8 +13,9 @@ from azureml.logging import get_azureml_logger # initialize the logger -run_logger = get_azureml_logger() -run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModelingWithoutDprep','true') +run_logger = get_azureml_logger() +run_logger.log( + 'amlrealworld.ChurnPrediction.CATelcoCustomerChurnModelingWithoutDprep', 'true') # Perform Data Preparation df = pd.read_csv('data/CATelcoCustomerChurnTrainingSample.csv') @@ -24,7 +25,7 @@ df = df.drop('month', 1) # One-Hot Encoding -columns_to_encode = list(df.select_dtypes(include=['category','object'])) +columns_to_encode = list(df.select_dtypes(include=['category', 'object'])) for column_to_encode in columns_to_encode: dummies = pd.get_dummies(df[column_to_encode]) one_hot_col_names = [] @@ -37,7 +38,7 @@ model = GaussianNB() random_seed = 42 -train, test = train_test_split(df, random_state = random_seed, test_size = 0.3) +train, test = train_test_split(df, random_state=random_seed, test_size=0.3) target = train['churn'].values train = train.drop('churn', 1) @@ -48,19 +49,21 @@ expected = test['churn'].values test = test.drop('churn', 1) predicted = model.predict(test) -print("Naive Bayes Classification Accuracy", accuracy_score(expected, predicted)) +print("Naive Bayes Classification Accuracy", + accuracy_score(expected, predicted)) # Log the Naive Bayes accuracy run_logger.log("Naive Bayes Accuracy", accuracy_score(expected, predicted)) dt = DecisionTreeClassifier(min_samples_split=20, random_state=99) dt.fit(train, target) predicted = dt.predict(test) -print("Decision Tree Classification Accuracy", accuracy_score(expected, predicted)) +print("Decision Tree Classification Accuracy", + accuracy_score(expected, predicted)) # log the DTree Accuracy run_logger.log("DTree Accuracy", accuracy_score(expected, predicted)) # serialize the model on disk in the special 'outputs' folder -print ("Export the model to outputs/model.pkl") +print("Export the model to outputs/model.pkl") f = open('./outputs/model.pkl', 'wb') pickle.dump(model, f) f.close() diff --git a/aml_config/jupyter_notebook_config.py b/aml_config/jupyter_notebook_config.py index 96636aa..6fc631a 100644 --- a/aml_config/jupyter_notebook_config.py +++ b/aml_config/jupyter_notebook_config.py @@ -1,332 +1,332 @@ # Configuration file for jupyter-notebook. -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Application(SingletonConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## This is an application. +# This is an application. -## The date format used by logging formatters for %(asctime)s +# The date format used by logging formatters for %(asctime)s #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' -## The Logging format template +# The Logging format template #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' -## Set the log level by value or name. +# Set the log level by value or name. #c.Application.log_level = 30 -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # JupyterApp(Application) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Base class for Jupyter applications +# Base class for Jupyter applications -## Answer yes to any prompts. +# Answer yes to any prompts. #c.JupyterApp.answer_yes = False -## Full path of a config file. +# Full path of a config file. #c.JupyterApp.config_file = '' -## Specify a config file to load. +# Specify a config file to load. #c.JupyterApp.config_file_name = '' -## Generate default config file. +# Generate default config file. #c.JupyterApp.generate_config = False -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # NotebookApp(JupyterApp) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Set the Access-Control-Allow-Credentials: true header +# Set the Access-Control-Allow-Credentials: true header #c.NotebookApp.allow_credentials = False -## Set the Access-Control-Allow-Origin header -# +# Set the Access-Control-Allow-Origin header +# # Use '*' to allow any origin to access your server. -# +# # Takes precedence over allow_origin_pat. #c.NotebookApp.allow_origin = '' -## Use a regular expression for the Access-Control-Allow-Origin header -# +# Use a regular expression for the Access-Control-Allow-Origin header +# # Requests from an origin matching the expression will get replies with: -# +# # Access-Control-Allow-Origin: origin -# +# # where `origin` is the origin of the request. -# +# # Ignored if allow_origin is set. #c.NotebookApp.allow_origin_pat = '' -## Whether to allow the user to run the notebook as root. +# Whether to allow the user to run the notebook as root. #c.NotebookApp.allow_root = False -## DEPRECATED use base_url +# DEPRECATED use base_url #c.NotebookApp.base_project_url = '/' -## The base URL for the notebook server. -# +# The base URL for the notebook server. +# # Leading and trailing slashes can be omitted, and will automatically be added. #c.NotebookApp.base_url = '/' -## Specify what command to use to invoke a web browser when opening the notebook. +# Specify what command to use to invoke a web browser when opening the notebook. # If not specified, the default browser will be determined by the `webbrowser` # standard library module, which allows setting of the BROWSER environment # variable to override it. #c.NotebookApp.browser = '' -## The full path to an SSL/TLS certificate file. +# The full path to an SSL/TLS certificate file. #c.NotebookApp.certfile = '' -## The full path to a certificate authority certificate for SSL/TLS client +# The full path to a certificate authority certificate for SSL/TLS client # authentication. #c.NotebookApp.client_ca = '' -## The config manager class to use +# The config manager class to use #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager' -## The notebook manager class to use. +# The notebook manager class to use. #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager' -## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's +# Extra keyword arguments to pass to `set_secure_cookie`. See tornado's # set_secure_cookie docs for details. #c.NotebookApp.cookie_options = {} -## The random bytes used to secure cookies. By default this is a new random +# The random bytes used to secure cookies. By default this is a new random # number every time you start the Notebook. Set it to a value in a config file # to enable logins to persist across server sessions. -# +# # Note: Cookie secrets should be kept private, do not share config files with # cookie_secret stored in plaintext (you can read the value from a file). #c.NotebookApp.cookie_secret = b'' -## The file where the cookie secret is stored. +# The file where the cookie secret is stored. #c.NotebookApp.cookie_secret_file = '' -## The default URL to redirect to from `/` +# The default URL to redirect to from `/` #c.NotebookApp.default_url = '/tree' -## Disable cross-site-request-forgery protection -# +# Disable cross-site-request-forgery protection +# # Jupyter notebook 4.3.1 introduces protection from cross-site request # forgeries, requiring API requests to either: -# +# # - originate from pages served by this server (validated with XSRF cookie and # token), or - authenticate with a token -# +# # Some anonymous compute resources still desire the ability to run code, # completely without authentication. These services can disable all # authentication and security checks, with the full knowledge of what that # implies. #c.NotebookApp.disable_check_xsrf = False -## Whether to enable MathJax for typesetting math/TeX -# +# Whether to enable MathJax for typesetting math/TeX +# # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is # very large, so you may want to disable it if you have a slow internet # connection, or for offline use of the notebook. -# +# # When disabled, equations etc. will appear as their untransformed TeX source. #c.NotebookApp.enable_mathjax = True -## extra paths to look for Javascript notebook extensions +# extra paths to look for Javascript notebook extensions #c.NotebookApp.extra_nbextensions_path = [] -## Extra paths to search for serving static files. -# +# Extra paths to search for serving static files. +# # This allows adding javascript/css to be available from the notebook server # machine, or overriding individual files in the IPython #c.NotebookApp.extra_static_paths = [] -## Extra paths to search for serving jinja templates. -# +# Extra paths to search for serving jinja templates. +# # Can be used to override templates from notebook.templates. #c.NotebookApp.extra_template_paths = [] -## +## #c.NotebookApp.file_to_run = '' -## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS +# Deprecated: Use minified JS file or not, mainly use during dev to avoid JS # recompilation #c.NotebookApp.ignore_minified_js = False -## (bytes/sec) Maximum rate at which messages can be sent on iopub before they +# (bytes/sec) Maximum rate at which messages can be sent on iopub before they # are limited. #c.NotebookApp.iopub_data_rate_limit = 1000000 -## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are +# (msgs/sec) Maximum rate at which messages can be sent on iopub before they are # limited. #c.NotebookApp.iopub_msg_rate_limit = 1000 -## The IP address the notebook server will listen on. +# The IP address the notebook server will listen on. #c.NotebookApp.ip = 'localhost' -## Supply extra arguments that will be passed to Jinja environment. +# Supply extra arguments that will be passed to Jinja environment. #c.NotebookApp.jinja_environment_options = {} -## Extra variables to supply to jinja templates when rendering. +# Extra variables to supply to jinja templates when rendering. #c.NotebookApp.jinja_template_vars = {} -## The kernel manager class to use. +# The kernel manager class to use. #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager' -## The kernel spec manager class to use. Should be a subclass of +# The kernel spec manager class to use. Should be a subclass of # `jupyter_client.kernelspec.KernelSpecManager`. -# +# # The Api of KernelSpecManager is provisional and might change without warning # between this version of Jupyter and the next stable one. #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' -## The full path to a private key file for usage with SSL/TLS. +# The full path to a private key file for usage with SSL/TLS. #c.NotebookApp.keyfile = '' -## The login handler class to use. +# The login handler class to use. #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler' -## The logout handler class to use. +# The logout handler class to use. #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler' -## The MathJax.js configuration file that is to be used. +# The MathJax.js configuration file that is to be used. #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe' -## A custom url for MathJax.js. Should be in the form of a case-sensitive url to +# A custom url for MathJax.js. Should be in the form of a case-sensitive url to # MathJax, for example: /static/components/MathJax/MathJax.js #c.NotebookApp.mathjax_url = '' -## Dict of Python modules to load as notebook server extensions.Entry values can +# Dict of Python modules to load as notebook server extensions.Entry values can # be used to enable and disable the loading ofthe extensions. The extensions # will be loaded in alphabetical order. #c.NotebookApp.nbserver_extensions = {} -## The directory to use for notebooks and kernels. +# The directory to use for notebooks and kernels. #c.NotebookApp.notebook_dir = '' -## Whether to open in a browser after starting. The specific browser used is +# Whether to open in a browser after starting. The specific browser used is # platform dependent and determined by the python standard library `webbrowser` # module, unless it is overridden using the --browser (NotebookApp.browser) # configuration option. #c.NotebookApp.open_browser = True -## Hashed password to use for web authentication. -# +# Hashed password to use for web authentication. +# # To generate, type in a python/IPython shell: -# +# # from notebook.auth import passwd; passwd() -# +# # The string should be of the form type:salt:hashed-password. #c.NotebookApp.password = '' -## Forces users to use a password for the Notebook server. This is useful in a +# Forces users to use a password for the Notebook server. This is useful in a # multi user environment, for instance when everybody in the LAN can access each # other's machine though ssh. -# +# # In such a case, server the notebook server on localhost is not secure since # any user can connect to the notebook server via ssh. #c.NotebookApp.password_required = False -## The port the notebook server will listen on. +# The port the notebook server will listen on. #c.NotebookApp.port = 8888 -## The number of additional ports to try if the specified port is not available. +# The number of additional ports to try if the specified port is not available. #c.NotebookApp.port_retries = 50 -## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. +# DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. #c.NotebookApp.pylab = 'disabled' -## (sec) Time window used to check the message and data rate limits. +# (sec) Time window used to check the message and data rate limits. #c.NotebookApp.rate_limit_window = 3 -## Reraise exceptions encountered loading server extensions? +# Reraise exceptions encountered loading server extensions? #c.NotebookApp.reraise_server_extension_failures = False -## DEPRECATED use the nbserver_extensions dict instead +# DEPRECATED use the nbserver_extensions dict instead #c.NotebookApp.server_extensions = [] -## The session manager class to use. +# The session manager class to use. #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager' -## Supply SSL options for the tornado HTTPServer. See the tornado docs for +# Supply SSL options for the tornado HTTPServer. See the tornado docs for # details. #c.NotebookApp.ssl_options = {} -## Supply overrides for terminado. Currently only supports "shell_command". +# Supply overrides for terminado. Currently only supports "shell_command". #c.NotebookApp.terminado_settings = {} -## Token used for authenticating first-time connections to the server. -# +# Token used for authenticating first-time connections to the server. +# # When no password is enabled, the default is to generate a new, random token. -# +# # Setting to an empty string disables authentication altogether, which is NOT # RECOMMENDED. #c.NotebookApp.token = '' -## Supply overrides for the tornado.web.Application that the Jupyter notebook +# Supply overrides for the tornado.web.Application that the Jupyter notebook # uses. #c.NotebookApp.tornado_settings = {} -## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- +# Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- # For headerssent by the upstream reverse proxy. Necessary if the proxy handles # SSL #c.NotebookApp.trust_xheaders = False -## DEPRECATED, use tornado_settings +# DEPRECATED, use tornado_settings #c.NotebookApp.webapp_settings = {} -## The base URL for websockets, if it differs from the HTTP server (hint: it +# The base URL for websockets, if it differs from the HTTP server (hint: it # almost certainly doesn't). -# +# # Should be in the form of an HTTP origin: ws[s]://hostname[:port] #c.NotebookApp.websocket_url = '' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # ConnectionFileMixin(LoggingConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Mixin for configurable classes that work with connection files +# Mixin for configurable classes that work with connection files -## JSON file in which to store connection info [default: kernel-.json] -# +# JSON file in which to store connection info [default: kernel-.json] +# # This file will contain the IP, ports, and authentication key needed to connect # clients to this kernel. By default, this file will be created in the security # dir of the current profile, but can be specified by absolute path. #c.ConnectionFileMixin.connection_file = '' -## set the control (ROUTER) port [default: random] +# set the control (ROUTER) port [default: random] #c.ConnectionFileMixin.control_port = 0 -## set the heartbeat port [default: random] +# set the heartbeat port [default: random] #c.ConnectionFileMixin.hb_port = 0 -## set the iopub (PUB) port [default: random] +# set the iopub (PUB) port [default: random] #c.ConnectionFileMixin.iopub_port = 0 -## Set the kernel's IP address [default localhost]. If the IP address is +# Set the kernel's IP address [default localhost]. If the IP address is # something other than localhost, then Consoles on other machines will be able # to connect to the Kernel, so be careful! #c.ConnectionFileMixin.ip = '' -## set the shell (ROUTER) port [default: random] +# set the shell (ROUTER) port [default: random] #c.ConnectionFileMixin.shell_port = 0 -## set the stdin (ROUTER) port [default: random] +# set the stdin (ROUTER) port [default: random] #c.ConnectionFileMixin.stdin_port = 0 -## +## #c.ConnectionFileMixin.transport = 'tcp' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # KernelManager(ConnectionFileMixin) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Manages a single kernel in a subprocess on this host. -# +# Manages a single kernel in a subprocess on this host. +# # This version starts kernels with Popen. -## Should we autorestart the kernel if it dies. +# Should we autorestart the kernel if it dies. #c.KernelManager.autorestart = True -## DEPRECATED: Use kernel_name instead. -# +# DEPRECATED: Use kernel_name instead. +# # The Popen Command to launch the kernel. Override this if you have a custom # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not # pass any arguments to the kernel, because it cannot make any assumptions about @@ -335,35 +335,35 @@ # line. #c.KernelManager.kernel_cmd = [] -## Time to wait for a kernel to terminate before killing it, in seconds. +# Time to wait for a kernel to terminate before killing it, in seconds. #c.KernelManager.shutdown_wait_time = 5.0 -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Session(Configurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Object for handling serialization and sending of messages. -# +# Object for handling serialization and sending of messages. +# # The Session object handles building messages and sending them with ZMQ sockets # or ZMQStream objects. Objects can communicate with each other over the # network via Session objects, and only need to work with the dict-based IPython # message spec. The Session will handle serialization/deserialization, security, # and metadata. -# +# # Sessions support configurable serialization via packer/unpacker traits, and # signing with HMAC digests via the key/keyfile traits. -# +# # Parameters ---------- -# +# # debug : bool # whether to trigger extra debugging statements # packer/unpacker : str : 'json', 'pickle' or import_string # importstrings for methods to serialize message parts. If just # 'json' or 'pickle', predefined JSON and pickle packers will be used. # Otherwise, the entire importstring must be used. -# +# # The functions must accept at least valid JSON input, and output *bytes*. -# +# # For example, to use msgpack: # packer = 'msgpack.packb', unpacker='msgpack.unpackb' # pack/unpack : callables @@ -379,158 +379,158 @@ # The file containing a key. If this is set, `key` will be initialized # to the contents of the file. -## Threshold (in bytes) beyond which an object's buffer should be extracted to +# Threshold (in bytes) beyond which an object's buffer should be extracted to # avoid pickling. #c.Session.buffer_threshold = 1024 -## Whether to check PID to protect against calls after fork. -# +# Whether to check PID to protect against calls after fork. +# # This check can be disabled if fork-safety is handled elsewhere. #c.Session.check_pid = True -## Threshold (in bytes) beyond which a buffer should be sent without copying. +# Threshold (in bytes) beyond which a buffer should be sent without copying. #c.Session.copy_threshold = 65536 -## Debug output in the Session +# Debug output in the Session #c.Session.debug = False -## The maximum number of digests to remember. -# +# The maximum number of digests to remember. +# # The digest history will be culled when it exceeds this value. #c.Session.digest_history_size = 65536 -## The maximum number of items for a container to be introspected for custom +# The maximum number of items for a container to be introspected for custom # serialization. Containers larger than this are pickled outright. #c.Session.item_threshold = 64 -## execution key, for signing messages. +# execution key, for signing messages. #c.Session.key = b'' -## path to file containing execution key. +# path to file containing execution key. #c.Session.keyfile = '' -## Metadata dictionary, which serves as the default top-level metadata dict for +# Metadata dictionary, which serves as the default top-level metadata dict for # each message. #c.Session.metadata = {} -## The name of the packer for serializing messages. Should be one of 'json', +# The name of the packer for serializing messages. Should be one of 'json', # 'pickle', or an import name for a custom callable serializer. #c.Session.packer = 'json' -## The UUID identifying this session. +# The UUID identifying this session. #c.Session.session = '' -## The digest scheme used to construct the message signatures. Must have the form +# The digest scheme used to construct the message signatures. Must have the form # 'hmac-HASH'. #c.Session.signature_scheme = 'hmac-sha256' -## The name of the unpacker for unserializing messages. Only used with custom +# The name of the unpacker for unserializing messages. Only used with custom # functions for `packer`. #c.Session.unpacker = 'json' -## Username for the Session. Default is your system username. +# Username for the Session. Default is your system username. #c.Session.username = 'username' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # MultiKernelManager(LoggingConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## A class for managing multiple kernels. +# A class for managing multiple kernels. -## The name of the default kernel to start +# The name of the default kernel to start #c.MultiKernelManager.default_kernel_name = 'python3' -## The kernel manager class. This is configurable to allow subclassing of the +# The kernel manager class. This is configurable to allow subclassing of the # KernelManager for customized behavior. #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # MappingKernelManager(MultiKernelManager) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## A KernelManager that handles notebook mapping and HTTP error handling +# A KernelManager that handles notebook mapping and HTTP error handling -## +## #c.MappingKernelManager.root_dir = '' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # ContentsManager(LoggingConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Base class for serving files and directories. -# +# Base class for serving files and directories. +# # This serves any text or binary file, as well as directories, with special # handling for JSON notebook documents. -# +# # Most APIs take a path argument, which is always an API-style unicode path, and # always refers to a directory. -# +# # - unicode, not url-escaped # - '/'-separated # - leading and trailing '/' will be stripped # - if unspecified, path defaults to '', # indicating the root path. -## +## #c.ContentsManager.checkpoints = None -## +## #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints' -## +## #c.ContentsManager.checkpoints_kwargs = {} -## Glob patterns to hide in file and directory listings. +# Glob patterns to hide in file and directory listings. #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~'] -## Python callable or importstring thereof -# +# Python callable or importstring thereof +# # To be called on a contents model prior to save. -# +# # This can be used to process the structure, such as removing notebook outputs # or other side effects that should not be saved. -# +# # It will be called as (all arguments passed by keyword):: -# +# # hook(path=path, model=model, contents_manager=self) -# +# # - model: the model to be saved. Includes file contents. # Modifying this dict will affect the file that is stored. # - path: the API path of the save destination # - contents_manager: this ContentsManager instance #c.ContentsManager.pre_save_hook = None -## +## #c.ContentsManager.root_dir = '/' -## The base name used when creating untitled directories. +# The base name used when creating untitled directories. #c.ContentsManager.untitled_directory = 'Untitled Folder' -## The base name used when creating untitled files. +# The base name used when creating untitled files. #c.ContentsManager.untitled_file = 'untitled' -## The base name used when creating untitled notebooks. +# The base name used when creating untitled notebooks. #c.ContentsManager.untitled_notebook = 'Untitled' -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # FileManagerMixin(Configurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Mixin for ContentsAPI classes that interact with the filesystem. -# +# Mixin for ContentsAPI classes that interact with the filesystem. +# # Provides facilities for reading, writing, and copying both notebooks and # generic files. -# +# # Shared by FileContentsManager and FileCheckpoints. -# +# # Note ---- Classes using this mixin must provide the following attributes: -# +# # root_dir : unicode # A directory against against which API-style paths are to be resolved. -# +# # log : logging.Logger -## By default notebooks are saved on disk on a temporary file and then if +# By default notebooks are saved on disk on a temporary file and then if # succefully written, it replaces the old ones. This procedure, namely # 'atomic_writing', causes some bugs on file system whitout operation order # enforcement (like some networked fs). If set to False, the new notebook is @@ -538,67 +538,73 @@ # ) #c.FileManagerMixin.use_atomic_writing = True -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # FileContentsManager(FileManagerMixin,ContentsManager) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## Python callable or importstring thereof -# +# Python callable or importstring thereof +# # to be called on the path of a file just saved. -# +# # This can be used to process the file on disk, such as converting the notebook # to a script or HTML via nbconvert. -# +# # It will be called as (all arguments passed by keyword):: -# +# # hook(os_path=os_path, model=model, contents_manager=instance) -# +# # - path: the filesystem path to the file just written - model: the model # representing the file - contents_manager: this ContentsManager instance #c.FileContentsManager.post_save_hook = None -## +## #c.FileContentsManager.root_dir = '' -## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 +# DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 #c.FileContentsManager.save_script = False -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # NotebookNotary(LoggingConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## A class for computing and verifying notebook signatures. +# A class for computing and verifying notebook signatures. -## The hashing algorithm used to sign notebooks. +# The hashing algorithm used to sign notebooks. #c.NotebookNotary.algorithm = 'sha256' -## The sqlite file in which to store notebook signatures. By default, this will +# The sqlite file in which to store notebook signatures. By default, this will # be in your Jupyter data directory. You can set it to ':memory:' to disable # sqlite writing to the filesystem. #c.NotebookNotary.db_file = '' -## The secret key with which notebooks are signed. +# The secret key with which notebooks are signed. #c.NotebookNotary.secret = b'' -## The file where the secret key is stored. +# The file where the secret key is stored. #c.NotebookNotary.secret_file = '' -## A callable returning the storage backend for notebook signatures. The default +# A callable returning the storage backend for notebook signatures. The default # uses an SQLite database. #c.NotebookNotary.store_factory = traitlets.Undefined -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # KernelSpecManager(LoggingConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -## If there is no Python kernelspec registered and the IPython kernel is +# If there is no Python kernelspec registered and the IPython kernel is # available, ensure it is added to the spec list. #c.KernelSpecManager.ensure_native_kernel = True -## The kernel spec class. This is configurable to allow subclassing of the +# The kernel spec class. This is configurable to allow subclassing of the # KernelSpecManager for customized behavior. #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' +# Whitelist of allowed kernel names. +# +# By default, all installed kernels are allowed. +#c.KernelSpecManager.whitelist = set() +Manager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' + ## Whitelist of allowed kernel names. # # By default, all installed kernels are allowed. diff --git a/churn_schema_gen.py b/churn_schema_gen.py index 70b37c9..f851d04 100644 --- a/churn_schema_gen.py +++ b/churn_schema_gen.py @@ -9,6 +9,8 @@ # Prepare the web service definition by authoring # init() and run() functions. Test the fucntions # before deploying the web service. + + def init(): from sklearn.externals import joblib @@ -16,6 +18,7 @@ def init(): global model model = joblib.load('model.pkl') + def run(input_df): import json input_df_encoded = input_df @@ -23,37 +26,36 @@ def run(input_df): input_df_encoded = input_df_encoded.drop('year', 1) input_df_encoded = input_df_encoded.drop('month', 1) input_df_encoded = input_df_encoded.drop('churn', 1) - - columns_encoded = ['age', 'annualincome', 'calldroprate', 'callfailurerate', 'callingnum', - 'customerid', 'monthlybilledamount', 'numberofcomplaints', - 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', - 'penaltytoswitch', 'totalminsusedinlastmonth', 'unpaidbalance', - 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', - 'churn', 'customersuspended_No', 'customersuspended_Yes', - 'education_Bachelor or equivalent', 'education_High School or below', - 'education_Master or equivalent', 'education_PhD or equivalent', - 'gender_Female', 'gender_Male', 'homeowner_No', 'homeowner_Yes', - 'maritalstatus_Married', 'maritalstatus_Single', 'noadditionallines_\\N', - 'occupation_Non-technology Related Job', 'occupation_Others', - 'occupation_Technology Related Job', 'state_AK', 'state_AL', 'state_AR', - 'state_AZ', 'state_CA', 'state_CO', 'state_CT', 'state_DE', 'state_FL', - 'state_GA', 'state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', - 'state_KS', 'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', - 'state_MI', 'state_MN', 'state_MO', 'state_MS', 'state_MT', 'state_NC', - 'state_ND', 'state_NE', 'state_NH', 'state_NJ', 'state_NM', 'state_NV', - 'state_NY', 'state_OH', 'state_OK', 'state_OR', 'state_PA', 'state_RI', - 'state_SC', 'state_SD', 'state_TN', 'state_TX', 'state_UT', 'state_VA', - 'state_VT', 'state_WA', 'state_WI', 'state_WV', 'state_WY', - 'usesinternetservice_No', 'usesinternetservice_Yes', - 'usesvoiceservice_No', 'usesvoiceservice_Yes'] - + 'customerid', 'monthlybilledamount', 'numberofcomplaints', + 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', + 'penaltytoswitch', 'totalminsusedinlastmonth', 'unpaidbalance', + 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', + 'churn', 'customersuspended_No', 'customersuspended_Yes', + 'education_Bachelor or equivalent', 'education_High School or below', + 'education_Master or equivalent', 'education_PhD or equivalent', + 'gender_Female', 'gender_Male', 'homeowner_No', 'homeowner_Yes', + 'maritalstatus_Married', 'maritalstatus_Single', 'noadditionallines_\\N', + 'occupation_Non-technology Related Job', 'occupation_Others', + 'occupation_Technology Related Job', 'state_AK', 'state_AL', 'state_AR', + 'state_AZ', 'state_CA', 'state_CO', 'state_CT', 'state_DE', 'state_FL', + 'state_GA', 'state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', + 'state_KS', 'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', + 'state_MI', 'state_MN', 'state_MO', 'state_MS', 'state_MT', 'state_NC', + 'state_ND', 'state_NE', 'state_NH', 'state_NJ', 'state_NM', 'state_NV', + 'state_NY', 'state_OH', 'state_OK', 'state_OR', 'state_PA', 'state_RI', + 'state_SC', 'state_SD', 'state_TN', 'state_TX', 'state_UT', 'state_VA', + 'state_VT', 'state_WA', 'state_WI', 'state_WV', 'state_WY', + 'usesinternetservice_No', 'usesinternetservice_Yes', + 'usesvoiceservice_No', 'usesvoiceservice_Yes'] + for column_encoded in columns_encoded: if not column_encoded in input_df.columns: input_df_encoded[column_encoded] = 0 - columns_to_encode = ['customersuspended', 'education', 'gender', 'homeowner', 'maritalstatus', 'noadditionallines', 'occupation', 'state', 'usesinternetservice', 'usesvoiceservice'] + columns_to_encode = ['customersuspended', 'education', 'gender', 'homeowner', 'maritalstatus', + 'noadditionallines', 'occupation', 'state', 'usesinternetservice', 'usesvoiceservice'] for column_to_encode in columns_to_encode: dummies = pandas.get_dummies(input_df[column_to_encode]) one_hot_col_names = [] @@ -61,16 +63,19 @@ def run(input_df): one_hot_col_names.append(column_to_encode + '_' + col_name) input_df_encoded[column_to_encode + '_' + col_name] = 1 input_df_encoded = input_df_encoded.drop(column_to_encode, 1) - + pred = model.predict(input_df_encoded) return json.dumps(str(pred[0])) -df = pandas.DataFrame(data=[[12,168147,0.06,0,4251078442,1,'Yes','Bachelor or equivalent','Male','Yes','Single',71,'\\N',0,7,96,'Technology Related Job',371,'WA',15,19,'No','No',0.82,5971,663,0,2015,1]], columns=['age' , 'annualincome' , 'calldroprate' , 'callfailurerate' , 'callingnum' , 'customerid' , 'customersuspended' , 'education' , 'gender' , 'homeowner' , 'maritalstatus' , 'monthlybilledamount' , 'noadditionallines' , 'numberofcomplaints' , 'numberofmonthunpaid' , 'numdayscontractequipmentplanexpiring' , 'occupation' , 'penaltytoswitch' , 'state' , 'totalminsusedinlastmonth' , 'unpaidbalance' , 'usesinternetservice' , 'usesvoiceservice' , 'percentagecalloutsidenetwork' , 'totalcallduration' , 'avgcallduration' , 'churn' , 'year' , 'month']) + +df = pandas.DataFrame(data=[[12, 168147, 0.06, 0, 4251078442, 1, 'Yes', 'Bachelor or equivalent', 'Male', 'Yes', 'Single', 71, '\\N', 0, 7, 96, 'Technology Related Job', 371, 'WA', 15, 19, 'No', 'No', 0.82, 5971, 663, 0, 2015, 1]], columns=['age', 'annualincome', 'calldroprate', 'callfailurerate', 'callingnum', 'customerid', 'customersuspended', 'education', 'gender', 'homeowner', + 'maritalstatus', 'monthlybilledamount', 'noadditionallines', 'numberofcomplaints', 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', 'occupation', 'penaltytoswitch', 'state', 'totalminsusedinlastmonth', 'unpaidbalance', 'usesinternetservice', 'usesvoiceservice', 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', 'churn', 'year', 'month']) df.dtypes df init() -input1 = pandas.DataFrame(data=[[12,168147,0.06,0,4251078442,1,'Yes','Bachelor or equivalent','Male','Yes','Single',71,'\\N',0,7,96,'Technology Related Job',371,'WA',15,19,'No','No',0.82,5971,663,0,2015,1]], columns=['age' , 'annualincome' , 'calldroprate' , 'callfailurerate' , 'callingnum' , 'customerid' , 'customersuspended' , 'education' , 'gender' , 'homeowner' , 'maritalstatus' , 'monthlybilledamount' , 'noadditionallines' , 'numberofcomplaints' , 'numberofmonthunpaid' , 'numdayscontractequipmentplanexpiring' , 'occupation' , 'penaltytoswitch' , 'state' , 'totalminsusedinlastmonth' , 'unpaidbalance' , 'usesinternetservice' , 'usesvoiceservice' , 'percentagecalloutsidenetwork' , 'totalcallduration' , 'avgcallduration' , 'churn' , 'year' , 'month']) +input1 = pandas.DataFrame(data=[[12, 168147, 0.06, 0, 4251078442, 1, 'Yes', 'Bachelor or equivalent', 'Male', 'Yes', 'Single', 71, '\\N', 0, 7, 96, 'Technology Related Job', 371, 'WA', 15, 19, 'No', 'No', 0.82, 5971, 663, 0, 2015, 1]], columns=['age', 'annualincome', 'calldroprate', 'callfailurerate', 'callingnum', 'customerid', 'customersuspended', 'education', 'gender', 'homeowner', + 'maritalstatus', 'monthlybilledamount', 'noadditionallines', 'numberofcomplaints', 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', 'occupation', 'penaltytoswitch', 'state', 'totalminsusedinlastmonth', 'unpaidbalance', 'usesinternetservice', 'usesvoiceservice', 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', 'churn', 'year', 'month']) run(input1) inputs = {"input_df": SampleDefinition(DataTypes.PANDAS, df)} diff --git a/score.py b/score.py index dc9cee1..7170c68 100644 --- a/score.py +++ b/score.py @@ -8,6 +8,8 @@ # Prepare the web service definition by authoring # init() and run() functions. Test the functions # before deploying the web service. + + def init(): from sklearn.externals import joblib @@ -15,6 +17,7 @@ def init(): global model model = joblib.load('model.pkl') + def run(input_df): import json input_df_encoded = input_df @@ -22,37 +25,36 @@ def run(input_df): input_df_encoded = input_df_encoded.drop('year', 1) input_df_encoded = input_df_encoded.drop('month', 1) input_df_encoded = input_df_encoded.drop('churn', 1) - - columns_encoded = ['age', 'annualincome', 'calldroprate', 'callfailurerate', 'callingnum', - 'customerid', 'monthlybilledamount', 'numberofcomplaints', - 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', - 'penaltytoswitch', 'totalminsusedinlastmonth', 'unpaidbalance', - 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', - 'churn', 'customersuspended_No', 'customersuspended_Yes', - 'education_Bachelor or equivalent', 'education_High School or below', - 'education_Master or equivalent', 'education_PhD or equivalent', - 'gender_Female', 'gender_Male', 'homeowner_No', 'homeowner_Yes', - 'maritalstatus_Married', 'maritalstatus_Single', 'noadditionallines_\\N', - 'occupation_Non-technology Related Job', 'occupation_Others', - 'occupation_Technology Related Job', 'state_AK', 'state_AL', 'state_AR', - 'state_AZ', 'state_CA', 'state_CO', 'state_CT', 'state_DE', 'state_FL', - 'state_GA', 'state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', - 'state_KS', 'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', - 'state_MI', 'state_MN', 'state_MO', 'state_MS', 'state_MT', 'state_NC', - 'state_ND', 'state_NE', 'state_NH', 'state_NJ', 'state_NM', 'state_NV', - 'state_NY', 'state_OH', 'state_OK', 'state_OR', 'state_PA', 'state_RI', - 'state_SC', 'state_SD', 'state_TN', 'state_TX', 'state_UT', 'state_VA', - 'state_VT', 'state_WA', 'state_WI', 'state_WV', 'state_WY', - 'usesinternetservice_No', 'usesinternetservice_Yes', - 'usesvoiceservice_No', 'usesvoiceservice_Yes'] - + 'customerid', 'monthlybilledamount', 'numberofcomplaints', + 'numberofmonthunpaid', 'numdayscontractequipmentplanexpiring', + 'penaltytoswitch', 'totalminsusedinlastmonth', 'unpaidbalance', + 'percentagecalloutsidenetwork', 'totalcallduration', 'avgcallduration', + 'churn', 'customersuspended_No', 'customersuspended_Yes', + 'education_Bachelor or equivalent', 'education_High School or below', + 'education_Master or equivalent', 'education_PhD or equivalent', + 'gender_Female', 'gender_Male', 'homeowner_No', 'homeowner_Yes', + 'maritalstatus_Married', 'maritalstatus_Single', 'noadditionallines_\\N', + 'occupation_Non-technology Related Job', 'occupation_Others', + 'occupation_Technology Related Job', 'state_AK', 'state_AL', 'state_AR', + 'state_AZ', 'state_CA', 'state_CO', 'state_CT', 'state_DE', 'state_FL', + 'state_GA', 'state_HI', 'state_IA', 'state_ID', 'state_IL', 'state_IN', + 'state_KS', 'state_KY', 'state_LA', 'state_MA', 'state_MD', 'state_ME', + 'state_MI', 'state_MN', 'state_MO', 'state_MS', 'state_MT', 'state_NC', + 'state_ND', 'state_NE', 'state_NH', 'state_NJ', 'state_NM', 'state_NV', + 'state_NY', 'state_OH', 'state_OK', 'state_OR', 'state_PA', 'state_RI', + 'state_SC', 'state_SD', 'state_TN', 'state_TX', 'state_UT', 'state_VA', + 'state_VT', 'state_WA', 'state_WI', 'state_WV', 'state_WY', + 'usesinternetservice_No', 'usesinternetservice_Yes', + 'usesvoiceservice_No', 'usesvoiceservice_Yes'] + for column_encoded in columns_encoded: if not column_encoded in input_df.columns: input_df_encoded[column_encoded] = 0 - columns_to_encode = ['customersuspended', 'education', 'gender', 'homeowner', 'maritalstatus', 'noadditionallines', 'occupation', 'state', 'usesinternetservice', 'usesvoiceservice'] + columns_to_encode = ['customersuspended', 'education', 'gender', 'homeowner', 'maritalstatus', + 'noadditionallines', 'occupation', 'state', 'usesinternetservice', 'usesvoiceservice'] for column_to_encode in columns_to_encode: dummies = pandas.get_dummies(input_df[column_to_encode]) one_hot_col_names = [] @@ -60,10 +62,11 @@ def run(input_df): one_hot_col_names.append(column_to_encode + '_' + col_name) input_df_encoded[column_to_encode + '_' + col_name] = 1 input_df_encoded = input_df_encoded.drop(column_to_encode, 1) - + pred = model.predict(input_df_encoded) return json.dumps(str(pred[0])) + # Implement test code to run in IDE or Azure ML Workbench if __name__ == '__main__': init()