diff --git a/Solution.py b/Solution.py new file mode 100644 index 00000000..b73b33d1 --- /dev/null +++ b/Solution.py @@ -0,0 +1,87 @@ + + +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import accuracy_score, classification_report +import pandas as pd + +def load_data(): + iris = load_iris() + X = iris.data + y = iris.target + + df = pd.DataFrame(X, columns=iris.feature_names) + df["species"] = y + + print("First 5 rows of data:") + print(df.head()) + print("\nFeature names:", iris.feature_names) + print("Target names:", iris.target_names) + print("\nShape of data:", df.shape) + + return df, iris + +def train_test_split_data(df): + X = df.drop("species", axis=1) + y = df["species"] + + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + print("\nTrain shape:", X_train.shape, "Test shape:", X_test.shape) + return X_train, X_test, y_train, y_test + +def train_logistic_regression(X_train, y_train): + log_reg = LogisticRegression(max_iter=200) + log_reg.fit(X_train, y_train) + print("\nLogistic Regression training complete.") + return log_reg + +def evaluate_model(model, X_test, y_test, model_name="Model"): + y_pred = model.predict(X_test) + acc = accuracy_score(y_test, y_pred) + + print(f"\n=== {model_name} Evaluation ===") + print("Accuracy:", acc) + print("\nClassification report:") + print(classification_report(y_test, y_pred)) + + print("Sample predictions vs true:") + for i in range(min(5, len(y_test))): + print(f"Predicted: {y_pred[i]}, True: {y_test.iloc[i]}") + return acc + +def train_knn(X_train, y_train, n_neighbors=5): + knn = KNeighborsClassifier(n_neighbors=n_neighbors) + knn.fit(X_train, y_train) + print(f"\nKNN (k={n_neighbors}) training complete.") + return knn + +def main(): + df, iris = load_data() + X_train, X_test, y_train, y_test = train_test_split_data(df) + + #Logistic Regression + log_reg_model = train_logistic_regression(X_train, y_train) + log_reg_acc = evaluate_model(log_reg_model, X_test, y_test, "Logistic Regression") + + #KNN + knn_model = train_knn(X_train, y_train, n_neighbors=5) + knn_acc = evaluate_model(knn_model, X_test, y_test, "KNN (k=5)") + + print("\nSummary:") + print(f"Logistic Regression accuracy: {log_reg_acc:.4f}") + print(f"KNN (k=5) accuracy: {knn_acc:.4f}") + + if knn_acc > log_reg_acc: + print("KNN performed better on this test split.") + elif knn_acc < log_reg_acc: + print("Logistic Regression performed better on this test split.") + else: + print("Both models performed equally on this test split.") + +if __name__ == "__main__": + main() diff --git a/excercise.md b/excercise.md new file mode 100644 index 00000000..125c0e2d --- /dev/null +++ b/excercise.md @@ -0,0 +1,44 @@ +# ML Learning Project – Iris Classification (Exercise) + +## 1. Overview + +In this exercise, you will build a **Machine Learning model** to classify iris flowers into three species using the classic **Iris dataset**. + +You will: +- Load and explore data +- Split into train/test +- Train a model +- Evaluate performance +- Try simple improvements + +--- + +## 2. Objective + +> Build a classifier that predicts the iris flower species based on petal and sepal measurements. + +Target variable: +- `species` (Setosa, Versicolor, Virginica) + +Features: +- `sepal length` +- `sepal width` +- `petal length` +- `petal width` + +We will use the built-in Iris dataset from **scikit-learn** (no external CSV needed). + +--- + +## 3. Requirements + +### Tools / Libraries +- Python 3.x +- `scikit-learn` +- `pandas` +- `numpy` +- `matplotlib` (optional, for plots) + +Install (if needed): + +```