-
Notifications
You must be signed in to change notification settings - Fork 46
Open
Labels
Description
When oob=True, the classification accuracy doesn't match oob=False and also shows variability even with a set seed.
from rerf.rerfClassifier import rerfClassifier
# Import scikit-learn dataset library
from sklearn import datasets# Load dataset
iris = datasets.load_iris()import pandas as pdfrom sklearn.model_selection import train_test_splitX = data[["sepal length", "sepal width", "petal length", "petal width"]] # Features
y = data["species"] # Labels# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3
) # 70% training and 30% testclf1 = rerfClassifier(n_estimators=10, oob_score=False, random_state=2)
clf2 = rerfClassifier(n_estimators=10, oob_score=True, random_state=2)clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None,
max_depth=None, max_features='auto', min_samples_split=1,
n_estimators=10, n_jobs=None, oob_score=True,
patch_height_max=None, patch_height_min=1, patch_width_max=None,
patch_width_min=1, projection_matrix='RerF', random_state=2)
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)from sklearn import metricsprint("Accuracy:", metrics.accuracy_score(y_test, y_pred1))
print("Accuracy oob:", metrics.accuracy_score(y_test, y_pred2))Accuracy: 0.9555555555555556
Accuracy oob: 0.9333333333333333
Reactions are currently unavailable