From b89af7e0880d5da4443c15928c112f7fe99c9670 Mon Sep 17 00:00:00 2001
From: Manish <floss@radii.dev>
Date: Wed, 6 May 2020 03:04:09 +1000
Subject: [PATCH 1/4] Workaround for SSLError while downloaidng dataset

Workaround for weak DH key used by www.cs.cmu.edu. Cipher selected is supported by server and is known to have no security flaw. Workaround code is only run when user encounter error on system as it is possible that older systems do not raise warning but may not support workaround cipher. Throwback error if its not exactly what we are looking for, to prevent hiding any other possible bug.
---
 tools/startup.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tools/startup.py b/tools/startup.py
index 4638e0d115e..c3d2bc521ab 100644
--- a/tools/startup.py
+++ b/tools/startup.py
@@ -32,7 +32,22 @@
 print "download will complete at about 423 MB"
 import urllib
 url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
-urllib.urlretrieve(url, filename="../enron_mail_20150507.tar.gz") 
+filename = "../enron_mail_20150507.tar.gz"
+try: 
+    urllib.urlretrieve(url, filename=filename)
+except IOError as socket_error:
+    expected_error = (
+        "IOError('socket error', SSLError(1, u'[SSL: DH_KEY_TOO_SMALL]"+
+        " dh key too small (_ssl.c:727)'))"
+        )
+    if repr(socket_error) == expected_error:
+        import ssl
+        cipher = "ECDHE-RSA-AES128-GCM-SHA256"
+        context = ssl.create_default_context()
+        context.set_ciphers(cipher)
+        urllib.urlretrieve(url, filename=filename, context=context)
+    else:
+        raise socket_error
 print "download complete!"
 
 

From 738d07f9a65381937210f0e9dba254ef4ce98629 Mon Sep 17 00:00:00 2001
From: Manish <floss@radii.dev>
Date: Sun, 14 Jun 2020 23:25:14 +1000
Subject: [PATCH 2/4] Replace deprecated module

---
 outliers/outlier_removal_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/outliers/outlier_removal_regression.py b/outliers/outlier_removal_regression.py
index d509cd9f22f..789eb7c139a 100644
--- a/outliers/outlier_removal_regression.py
+++ b/outliers/outlier_removal_regression.py
@@ -20,7 +20,7 @@
 ### and n_columns is the number of features
 ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
 net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)
 
 ### fill in a regression here!  Name the regression object reg so that

From f5c5e6c7096d8346f5bfb6729abd1c153da05539 Mon Sep 17 00:00:00 2001
From: Manish <floss@radii.dev>
Date: Tue, 16 Jun 2020 19:45:19 +1000
Subject: [PATCH 3/4] Replace deprecated module

---
 feature_selection/find_signature.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/feature_selection/find_signature.py b/feature_selection/find_signature.py
index c01a1f2111a..4e63f4d2967 100644
--- a/feature_selection/find_signature.py
+++ b/feature_selection/find_signature.py
@@ -19,8 +19,8 @@
 ### remainder go into training)
 ### feature matrices changed to dense representations for compatibility with
 ### classifier functions in versions 0.15.2 and earlier
-from sklearn import cross_validation
-features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
+from sklearn import model_selection
+features_train, features_test, labels_train, labels_test = model_selection.train_test_split(word_data, authors, test_size=0.1, random_state=42)
 
 from sklearn.feature_extraction.text import TfidfVectorizer
 vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,

From fe4f3145330fe244e644ed5e86fdb698a08bbefd Mon Sep 17 00:00:00 2001
From: Manish <floss@radii.dev>
Date: Tue, 16 Jun 2020 22:37:54 +1000
Subject: [PATCH 4/4] Fix depracted modules

---
 pca/eigenfaces.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pca/eigenfaces.py b/pca/eigenfaces.py
index 074b860a253..b9ad1ccb9f2 100644
--- a/pca/eigenfaces.py
+++ b/pca/eigenfaces.py
@@ -23,12 +23,12 @@
 import pylab as pl
 import numpy as np
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.datasets import fetch_lfw_people
-from sklearn.grid_search import GridSearchCV
+from sklearn.model_selection import GridSearchCV
 from sklearn.metrics import classification_report
 from sklearn.metrics import confusion_matrix
-from sklearn.decomposition import RandomizedPCA
+from sklearn.decomposition import PCA
 from sklearn.svm import SVC
 
 # Display progress logs on stdout
@@ -70,7 +70,10 @@
 
 print "Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])
 t0 = time()
-pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
+pca = PCA(
+    n_components=n_components,
+    svd_solver='randomized',
+    whiten=True).fit(X_train)
 print "done in %0.3fs" % (time() - t0)
 
 eigenfaces = pca.components_.reshape((n_components, h, w))