diff --git a/learning_curve.py b/learning_curve.py index 2baa81b..ebc2145 100644 --- a/learning_curve.py +++ b/learning_curve.py @@ -21,14 +21,14 @@ def display_digits(): def train_model(): """Train a model on pictures of digits. - + Read in 8x8 pictures of numbers and evaluate the accuracy of the model when different percentages of the data are used as training data. This function plots the average accuracy of the model as a function of the percent of data used to train it. """ data = load_digits() - num_trials = 10 + num_trials = 100 train_percentages = range(5, 95, 5) test_accuracies = numpy.zeros(len(train_percentages)) @@ -38,8 +38,15 @@ def train_model(): # variability. # For consistency with the previous example use # model = LogisticRegression(C=10**-10) for your learner - - # TODO: your code here + for idx, train_size in enumerate(train_percentages): + trial_accuracies = [] + for i in range(num_trials): + X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size=train_size/100) + model = LogisticRegression(C=10**-1) + model.fit(X_train, y_train) + trial_accuracies.append(model.score(X_test, y_test)) + average = sum(trial_accuracies)/num_trials + test_accuracies[idx] = average fig = plt.figure() plt.plot(train_percentages, test_accuracies) @@ -51,4 +58,4 @@ def train_model(): if __name__ == "__main__": # Feel free to comment/uncomment as needed display_digits() - # train_model() + train_model() diff --git a/questions.txt.pdf b/questions.txt.pdf new file mode 100644 index 0000000..f4878dc Binary files /dev/null and b/questions.txt.pdf differ